使用嵌入模型获取文本片段的文本嵌入。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
C#
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 C# 设置说明执行操作。如需了解详情,请参阅 Vertex AI C# API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
using Google.Cloud.AIPlatform.V1;
using System;
using System.Collections.Generic;
using System.Linq;
using Value = Google.Protobuf.WellKnownTypes.Value;
public class PredictTextEmbeddingsSample
{
public int PredictTextEmbeddings(
string projectId = "your-project-id",
string locationId = "us-central1",
string publisher = "google",
string model = "textembedding-gecko@001"
)
{
// Initialize client that will be used to send requests.
// This client only needs to be created once,
// and can be reused for multiple requests.
var client = new PredictionServiceClientBuilder
{
Endpoint = $"{locationId}-aiplatform.googleapis.com"
}.Build();
// Configure the parent resource.
var endpoint = EndpointName.FromProjectLocationPublisherModel(projectId, locationId, publisher, model);
// Initialize request argument(s).
var instances = new List<Value>
{
Value.ForStruct(new()
{
Fields =
{
["content"] = Value.ForString("What is life?"),
}
})
};
// Make the request.
var response = client.Predict(endpoint, instances, null);
// Parse and return the embedding vector count.
var values = response.Predictions.First().StructValue.Fields["embeddings"].StructValue.Fields["values"].ListValue.Values;
Console.WriteLine($"Length of embedding vector: {values.Count}");
return values.Count;
}
}
Java
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Java 设置说明执行操作。如需了解详情,请参阅 Vertex AI Java API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import static java.util.stream.Collectors.toList;
import com.google.cloud.aiplatform.v1.EndpointName;
import com.google.cloud.aiplatform.v1.PredictRequest;
import com.google.cloud.aiplatform.v1.PredictResponse;
import com.google.cloud.aiplatform.v1.PredictionServiceClient;
import com.google.cloud.aiplatform.v1.PredictionServiceSettings;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.OptionalInt;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class PredictTextEmbeddingsSample {
public static void main(String[] args) throws IOException {
// TODO(developer): Replace these variables before running the sample.
// Details about text embedding request structure and supported models are available in:
// https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings
String endpoint = "us-central1-aiplatform.googleapis.com:443";
String project = "YOUR_PROJECT_ID";
String model = "text-embedding-004";
predictTextEmbeddings(
endpoint,
project,
model,
List.of("banana bread?", "banana muffins?"),
"QUESTION_ANSWERING",
OptionalInt.of(256));
}
// Gets text embeddings from a pretrained, foundational model.
public static List<List<Float>> predictTextEmbeddings(
String endpoint,
String project,
String model,
List<String> texts,
String task,
OptionalInt outputDimensionality)
throws IOException {
PredictionServiceSettings settings =
PredictionServiceSettings.newBuilder().setEndpoint(endpoint).build();
Matcher matcher = Pattern.compile("^(?<Location>\\w+-\\w+)").matcher(endpoint);
String location = matcher.matches() ? matcher.group("Location") : "us-central1";
EndpointName endpointName =
EndpointName.ofProjectLocationPublisherModelName(project, location, "google", model);
// You can use this prediction service client for multiple requests.
try (PredictionServiceClient client = PredictionServiceClient.create(settings)) {
PredictRequest.Builder request =
PredictRequest.newBuilder().setEndpoint(endpointName.toString());
if (outputDimensionality.isPresent()) {
request.setParameters(
Value.newBuilder()
.setStructValue(
Struct.newBuilder()
.putFields("outputDimensionality", valueOf(outputDimensionality.getAsInt()))
.build()));
}
for (int i = 0; i < texts.size(); i++) {
request.addInstances(
Value.newBuilder()
.setStructValue(
Struct.newBuilder()
.putFields("content", valueOf(texts.get(i)))
.putFields("taskType", valueOf(task))
.build()));
}
PredictResponse response = client.predict(request.build());
List<List<Float>> floats = new ArrayList<>();
for (Value prediction : response.getPredictionsList()) {
Value embeddings = prediction.getStructValue().getFieldsOrThrow("embeddings");
Value values = embeddings.getStructValue().getFieldsOrThrow("values");
floats.add(
values.getListValue().getValuesList().stream()
.map(Value::getNumberValue)
.map(Double::floatValue)
.collect(toList()));
}
return floats;
}
}
private static Value valueOf(String s) {
return Value.newBuilder().setStringValue(s).build();
}
private static Value valueOf(int n) {
return Value.newBuilder().setNumberValue(n).build();
}
}
Node.js
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Node.js 设置说明执行操作。如需了解详情,请参阅 Vertex AI Node.js API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
async function main(
project,
model = 'text-embedding-004',
texts = 'banana bread?;banana muffins?',
task = 'QUESTION_ANSWERING',
outputDimensionality = 0,
apiEndpoint = 'us-central1-aiplatform.googleapis.com'
) {
const aiplatform = require('@google-cloud/aiplatform');
const {PredictionServiceClient} = aiplatform.v1;
const {helpers} = aiplatform; // helps construct protobuf.Value objects.
const clientOptions = {apiEndpoint: apiEndpoint};
const location = 'us-central1';
const endpoint = `projects/${project}/locations/${location}/publishers/google/models/${model}`;
const parameters =
outputDimensionality > 0
? helpers.toValue(outputDimensionality)
: helpers.toValue(256);
async function callPredict() {
const instances = texts
.split(';')
.map(e => helpers.toValue({content: e, taskType: task}));
const request = {endpoint, instances, parameters};
const client = new PredictionServiceClient(clientOptions);
const [response] = await client.predict(request);
console.log('Got predict response');
const predictions = response.predictions;
for (const prediction of predictions) {
const embeddings = prediction.structValue.fields.embeddings;
const values = embeddings.structValue.fields.values.listValue.values;
console.log('Got prediction: ' + JSON.stringify(values));
}
}
callPredict();
}
Python
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Python 设置说明执行操作。如需了解详情,请参阅 Vertex AI Python API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
from typing import List, Optional
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
def embed_text(
texts: List[str] = ["banana muffins? ", "banana bread? banana muffins?"],
task: str = "RETRIEVAL_DOCUMENT",
model_name: str = "text-embedding-004",
dimensionality: Optional[int] = 256,
) -> List[List[float]]:
"""Embeds texts with a pre-trained, foundational model."""
model = TextEmbeddingModel.from_pretrained(model_name)
inputs = [TextEmbeddingInput(text, task) for text in texts]
kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
embeddings = model.get_embeddings(inputs, **kwargs)
return [embedding.values for embedding in embeddings]
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。