A partir de 29 de abril de 2025, os modelos Gemini 1.5 Pro e Gemini 1.5 Flash não estarão disponíveis em projetos que não os usaram antes, incluindo novos projetos. Para mais detalhes, consulte
Versões e ciclo de vida do modelo.
Extrair embeddings de um segmento de vídeo
Mantenha tudo organizado com as coleções
Salve e categorize o conteúdo com base nas suas preferências.
Este exemplo de código demonstra como extrair embeddings de vídeo de um segmento específico de um vídeo usando o modelo de embeddings multimodais.
Mais informações
Para ver a documentação detalhada que inclui este exemplo de código, consulte:
Exemplo de código
Exceto em caso de indicação contrária, o conteúdo desta página é licenciado de acordo com a Licença de atribuição 4.0 do Creative Commons, e as amostras de código são licenciadas de acordo com a Licença Apache 2.0. Para mais detalhes, consulte as políticas do site do Google Developers. Java é uma marca registrada da Oracle e/ou afiliadas.
[[["Fácil de entender","easyToUnderstand","thumb-up"],["Meu problema foi resolvido","solvedMyProblem","thumb-up"],["Outro","otherUp","thumb-up"]],[["Difícil de entender","hardToUnderstand","thumb-down"],["Informações incorretas ou exemplo de código","incorrectInformationOrSampleCode","thumb-down"],["Não contém as informações/amostras de que eu preciso","missingTheInformationSamplesINeed","thumb-down"],["Problema na tradução","translationIssue","thumb-down"],["Outro","otherDown","thumb-down"]],[],[],[],null,["# Get video embeddings from a video segment\n\nThis code sample demonstrates how to extract video embeddings from a specific segment of a video using the MultiModal Embedding model.\n\nExplore further\n---------------\n\n\nFor detailed documentation that includes this code sample, see the following:\n\n- [Get multimodal embeddings](/vertex-ai/generative-ai/docs/embeddings/get-multimodal-embeddings)\n- [Multimodal embeddings API](/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings-api)\n\nCode sample\n-----------\n\n### Go\n\n\nBefore trying this sample, follow the Go setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Go API\nreference documentation](/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import (\n \t\"context\"\n \t\"encoding/json\"\n \t\"fmt\"\n \t\"io\"\n \t\"time\"\n\n \taiplatform \"cloud.google.com/go/aiplatform/apiv1beta1\"\n \taiplatformpb \"cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb\"\n \t\"google.golang.org/api/option\"\n \t\"google.golang.org/protobuf/encoding/protojson\"\n \t\"google.golang.org/protobuf/types/known/structpb\"\n )\n\n // generateForVideo shows how to use the multimodal model to generate embeddings for video input.\n func generateForVideo(w io.Writer, project, location string) error {\n \t// location = \"us-central1\"\n\n \t// The default context timeout may be not enough to process a video input.\n \tctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)\n \tdefer cancel()\n\n \tapiEndpoint := fmt.Sprintf(\"%s-aiplatform.googleapis.com:443\", location)\n \tclient, err := aiplatform.https://cloud.google.com/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1beta1.html#cloud_google_com_go_aiplatform_apiv1beta1_PredictionClient_NewPredictionClient(ctx, option.WithEndpoint(apiEndpoint))\n \tif err != nil {\n \t\treturn fmt.Errorf(\"failed to construct API client: %w\", err)\n \t}\n \tdefer client.Close()\n\n \tmodel := \"multimodalembedding@001\"\n \tendpoint := fmt.Sprintf(\"projects/%s/locations/%s/publishers/google/models/%s\", project, location, model)\n\n \t// This is the input to the model's prediction call. For schema, see:\n \t// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings-api#request_body\n \tinstances, err := structpb.NewValue(map[string]any{\n \t\t\"video\": map[string]any{\n \t\t\t// Video input can be provided either as a Google Cloud Storage URI or as base64-encoded\n \t\t\t// bytes using the \"bytesBase64Encoded\" field.\n \t\t\t\"gcsUri\": \"gs://cloud-samples-data/vertex-ai-vision/highway_vehicles.mp4\",\n \t\t\t\"videoSegmentConfig\": map[string]any{\n \t\t\t\t\"startOffsetSec\": 1,\n \t\t\t\t\"endOffsetSec\": 5,\n \t\t\t},\n \t\t},\n \t})\n \tif err != nil {\n \t\treturn fmt.Errorf(\"failed to construct request payload: %w\", err)\n \t}\n\n \treq := &aiplatformpb.PredictRequest{\n \t\tEndpoint: endpoint,\n \t\t// The model supports only 1 instance per request.\n \t\tInstances: []*structpb.Value{instances},\n \t}\n \tresp, err := client.Predict(ctx, req)\n \tif err != nil {\n \t\treturn fmt.Errorf(\"failed to generate embeddings: %w\", err)\n \t}\n\n \tinstanceEmbeddingsJson, err := protojson.Marshal(resp.GetPredictions()[0])\n \tif err != nil {\n \t\treturn fmt.Errorf(\"failed to convert protobuf value to JSON: %w\", err)\n \t}\n \t// For response schema, see:\n \t// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings-api#response-body\n \tvar instanceEmbeddings struct {\n \t\tVideoEmbeddings []struct {\n \t\t\tEmbedding []float32 `json:\"embedding\"`\n \t\t\tStartOffsetSec float64 `json:\"startOffsetSec\"`\n \t\t\tEndOffsetSec float64 `json:\"endOffsetSec\"`\n \t\t} `json:\"videoEmbeddings\"`\n \t}\n \tif err := json.Unmarshal(instanceEmbeddingsJson, &instanceEmbeddings); err != nil {\n \t\treturn fmt.Errorf(\"failed to unmarshal json: %w\", err)\n \t}\n \t// Get the embedding for our single video segment (`.videoEmbeddings` object has one entry per\n \t// each processed segment).\n \tvideoEmbedding := instanceEmbeddings.VideoEmbeddings[0]\n\n \tfmt.Fprintf(w, \"Video embedding (seconds: %.f-%.f; length=%d): %v\\n\",\n \t\tvideoEmbedding.StartOffsetSec,\n \t\tvideoEmbedding.EndOffsetSec,\n \t\tlen(videoEmbedding.Embedding),\n \t\tvideoEmbedding.Embedding,\n \t)\n \t// Example response:\n \t// Video embedding (seconds: 1-5; length=1408): [-0.016427778 0.032878537 -0.030755188 ... ]\n\n \treturn nil\n }\n\n### Python\n\n\nBefore trying this sample, follow the Python setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Python API\nreference documentation](/python/docs/reference/aiplatform/latest).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import https://cloud.google.com/python/docs/reference/vertexai/latest/\n\n from vertexai.vision_models import https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.MultiModalEmbeddingModel.html, https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.Video.html\n from vertexai.vision_models import https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.VideoSegmentConfig.html\n\n # TODO(developer): Update & uncomment line below\n # PROJECT_ID = \"your-project-id\"\n https://cloud.google.com/python/docs/reference/vertexai/latest/.init(project=PROJECT_ID, location=\"us-central1\")\n\n model = https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.MultiModalEmbeddingModel.html.https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.MultiModalEmbeddingModel.html#vertexai_preview_vision_models_MultiModalEmbeddingModel_from_pretrained(\"multimodalembedding@001\")\n\n embeddings = model.https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.MultiModalEmbeddingModel.html#vertexai_preview_vision_models_MultiModalEmbeddingModel_get_embeddings(\n video=https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.Video.html.https://cloud.google.com/python/docs/reference/vertexai/latest/vertexai.preview.vision_models.Video.html#vertexai_preview_vision_models_Video_load_from_file(\n \"gs://cloud-samples-data/vertex-ai-vision/highway_vehicles.mp4\"\n ),\n video_segment_config=VideoSegmentConfig(end_offset_sec=1),\n )\n\n # Video Embeddings are segmented based on the video_segment_config.\n print(\"Video Embeddings:\")\n for video_embedding in embeddings.video_embeddings:\n print(\n f\"Video Segment: {video_embedding.start_offset_sec} - {video_embedding.end_offset_sec}\"\n )\n print(f\"Embedding: {video_embedding.embedding}\")\n\n # Example response:\n # Video Embeddings:\n # Video Segment: 0.0 - 1.0\n # Embedding: [-0.0206376351, 0.0123456789, ...]\n\nWhat's next\n-----------\n\n\nTo search and filter code samples for other Google Cloud products, see the\n[Google Cloud sample browser](/docs/samples?product=generativeaionvertexai)."]]