使用 Vertex AI 上的生成式 AI 创建嵌入

使用生成式 AI 构建嵌入

深入探索

如需查看包含此代码示例的详细文档,请参阅以下内容:

代码示例

Go

在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Go 设置说明执行操作。 如需了解详情,请参阅 Vertex AI Go API 参考文档

如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

import (
	"context"
	"fmt"
	"regexp"

	aiplatform "cloud.google.com/go/aiplatform/apiv1"
	"cloud.google.com/go/aiplatform/apiv1/aiplatformpb"

	"google.golang.org/api/option"
	"google.golang.org/protobuf/types/known/structpb"
)

func embedTextsPreview(
	apiEndpoint, project, model string, texts []string,
	task string, dimensionality *int) ([][]float32, error) {
	ctx := context.Background()

	client, err := aiplatform.NewPredictionClient(ctx, option.WithEndpoint(apiEndpoint))
	if err != nil {
		return nil, err
	}
	defer client.Close()

	match := regexp.MustCompile(`^(\w+-\w+)`).FindStringSubmatch(apiEndpoint)
	location := "us-central1"
	if match != nil {
		location = match[1]
	}
	endpoint := fmt.Sprintf("projects/%s/locations/%s/publishers/google/models/%s", project, location, model)
	instances := make([]*structpb.Value, len(texts))
	for i, text := range texts {
		instances[i] = structpb.NewStructValue(&structpb.Struct{
			Fields: map[string]*structpb.Value{
				"content":   structpb.NewStringValue(text),
				"task_type": structpb.NewStringValue(task),
			},
		})
	}
	outputDimensionality := structpb.NewNullValue()
	if dimensionality != nil {
		outputDimensionality = structpb.NewNumberValue(float64(*dimensionality))
	}
	params := structpb.NewStructValue(&structpb.Struct{
		Fields: map[string]*structpb.Value{"outputDimensionality": outputDimensionality},
	})

	req := &aiplatformpb.PredictRequest{
		Endpoint:   endpoint,
		Instances:  instances,
		Parameters: params,
	}
	resp, err := client.Predict(ctx, req)
	if err != nil {
		return nil, err
	}
	embeddings := make([][]float32, len(resp.Predictions))
	for i, prediction := range resp.Predictions {
		values := prediction.GetStructValue().Fields["embeddings"].GetStructValue().Fields["values"].GetListValue().Values
		embeddings[i] = make([]float32, len(values))
		for j, value := range values {
			embeddings[i][j] = float32(value.GetNumberValue())
		}
	}
	return embeddings, nil
}

Java

在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Java 设置说明执行操作。 如需了解详情,请参阅 Vertex AI Java API 参考文档

如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

import static java.util.stream.Collectors.toList;

import com.google.cloud.aiplatform.v1.EndpointName;
import com.google.cloud.aiplatform.v1.PredictRequest;
import com.google.cloud.aiplatform.v1.PredictResponse;
import com.google.cloud.aiplatform.v1.PredictionServiceClient;
import com.google.cloud.aiplatform.v1.PredictionServiceSettings;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.OptionalInt;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PredictTextEmbeddingsSample {
  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    // Details about text embedding request structure and supported models are available in:
    // https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings
    String endpoint = "us-central1-aiplatform.googleapis.com:443";
    String project = "YOUR_PROJECT_ID";
    String model = "text-embedding-004";
    predictTextEmbeddings(
        endpoint,
        project,
        model,
        List.of("banana bread?", "banana muffins?"),
        "QUESTION_ANSWERING",
        OptionalInt.of(256));
  }

  // Gets text embeddings from a pretrained, foundational model.
  public static List<List<Float>> predictTextEmbeddings(
      String endpoint,
      String project,
      String model,
      List<String> texts,
      String task,
      OptionalInt outputDimensionality)
      throws IOException {
    PredictionServiceSettings settings =
        PredictionServiceSettings.newBuilder().setEndpoint(endpoint).build();
    Matcher matcher = Pattern.compile("^(?<Location>\\w+-\\w+)").matcher(endpoint);
    String location = matcher.matches() ? matcher.group("Location") : "us-central1";
    EndpointName endpointName =
        EndpointName.ofProjectLocationPublisherModelName(project, location, "google", model);

    // You can use this prediction service client for multiple requests.
    try (PredictionServiceClient client = PredictionServiceClient.create(settings)) {
      PredictRequest.Builder request =
          PredictRequest.newBuilder().setEndpoint(endpointName.toString());
      if (outputDimensionality.isPresent()) {
        request.setParameters(
            Value.newBuilder()
                .setStructValue(
                    Struct.newBuilder()
                        .putFields("outputDimensionality", valueOf(outputDimensionality.getAsInt()))
                        .build()));
      }
      for (int i = 0; i < texts.size(); i++) {
        request.addInstances(
            Value.newBuilder()
                .setStructValue(
                    Struct.newBuilder()
                        .putFields("content", valueOf(texts.get(i)))
                        .putFields("task_type", valueOf(task))
                        .build()));
      }
      PredictResponse response = client.predict(request.build());
      List<List<Float>> floats = new ArrayList<>();
      for (Value prediction : response.getPredictionsList()) {
        Value embeddings = prediction.getStructValue().getFieldsOrThrow("embeddings");
        Value values = embeddings.getStructValue().getFieldsOrThrow("values");
        floats.add(
            values.getListValue().getValuesList().stream()
                .map(Value::getNumberValue)
                .map(Double::floatValue)
                .collect(toList()));
      }
      return floats;
    }
  }

  private static Value valueOf(String s) {
    return Value.newBuilder().setStringValue(s).build();
  }

  private static Value valueOf(int n) {
    return Value.newBuilder().setNumberValue(n).build();
  }
}

Node.js

在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Node.js 设置说明执行操作。 如需了解详情,请参阅 Vertex AI Node.js API 参考文档

如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

async function main(
  project,
  model = 'text-embedding-004',
  texts = 'banana bread?;banana muffins?',
  task = 'QUESTION_ANSWERING',
  dimensionality = 0,
  apiEndpoint = 'us-central1-aiplatform.googleapis.com'
) {
  const aiplatform = require('@google-cloud/aiplatform');
  const {PredictionServiceClient} = aiplatform.v1;
  const {helpers} = aiplatform; // helps construct protobuf.Value objects.
  const clientOptions = {apiEndpoint: apiEndpoint};
  const location = 'us-central1';
  const endpoint = `projects/${project}/locations/${location}/publishers/google/models/${model}`;

  async function callPredict() {
    const instances = texts
      .split(';')
      .map(e => helpers.toValue({content: e, task_type: task}));
    const parameters = helpers.toValue(
      dimensionality > 0 ? {outputDimensionality: parseInt(dimensionality)} : {}
    );
    const request = {endpoint, instances, parameters};
    const client = new PredictionServiceClient(clientOptions);
    const [response] = await client.predict(request);
    const predictions = response.predictions;
    const embeddings = predictions.map(p => {
      const embeddingsProto = p.structValue.fields.embeddings;
      const valuesProto = embeddingsProto.structValue.fields.values;
      return valuesProto.listValue.values.map(v => v.numberValue);
    });
    console.log('Got embeddings: \n' + JSON.stringify(embeddings));
  }

  callPredict();
}

Python

在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Python 设置说明执行操作。 如需了解详情,请参阅 Vertex AI Python API 参考文档

如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

from typing import List, Optional

from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel


def embed_text(
    texts: List[str] = ["banana muffins? ", "banana bread? banana muffins?"],
    task: str = "RETRIEVAL_DOCUMENT",
    model_name: str = "text-embedding-004",
    dimensionality: Optional[int] = 256,
) -> List[List[float]]:
    """Embeds texts with a pre-trained, foundational model."""
    model = TextEmbeddingModel.from_pretrained(model_name)
    inputs = [TextEmbeddingInput(text, task) for text in texts]
    kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
    embeddings = model.get_embeddings(inputs, **kwargs)
    return [embedding.values for embedding in embeddings]

后续步骤

如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器