Membuat teks dari prompt multimodal

Contoh ini menunjukkan cara membuat teks dari perintah multimodal menggunakan model Gemini. Perintah terdiri dari tiga gambar dan dua perintah teks. Model menghasilkan respons teks yang mendeskripsikan gambar dan perintah teks.

Contoh kode

C#

Sebelum mencoba contoh ini, ikuti petunjuk penyiapan C# di Panduan memulai Vertex AI menggunakan library klien. Untuk mengetahui informasi selengkapnya, lihat Dokumentasi referensi API C# Vertex AI.

Untuk melakukan autentikasi ke Vertex AI, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, lihat Menyiapkan autentikasi untuk lingkungan pengembangan lokal.


using Google.Api.Gax.Grpc;
using Google.Cloud.AIPlatform.V1;
using Google.Protobuf;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;

public class MultimodalMultiImage
{
    public async Task<string> GenerateContent(
        string projectId = "your-project-id",
        string location = "us-central1",
        string publisher = "google",
        string model = "gemini-2.0-flash-001"
    )
    {
        var predictionServiceClient = new PredictionServiceClientBuilder
        {
            Endpoint = $"{location}-aiplatform.googleapis.com"
        }.Build();

        ByteString colosseum = await ReadImageFileAsync(
            "https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png");

        ByteString forbiddenCity = await ReadImageFileAsync(
            "https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark2.png");

        ByteString christRedeemer = await ReadImageFileAsync(
            "https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark3.png");

        var generateContentRequest = new GenerateContentRequest
        {
            Model = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
            Contents =
            {
                new Content
                {
                    Role = "USER",
                    Parts =
                    {
                        new Part { InlineData = new() { MimeType = "image/png", Data = colosseum }},
                        new Part { Text = "city: Rome, Landmark: the Colosseum" },
                        new Part { InlineData = new() { MimeType = "image/png", Data = forbiddenCity }},
                        new Part { Text = "city: Beijing, Landmark: Forbidden City"},
                        new Part { InlineData = new() { MimeType = "image/png", Data = christRedeemer }}
                    }
                }
            }
        };

        using PredictionServiceClient.StreamGenerateContentStream response = predictionServiceClient.StreamGenerateContent(generateContentRequest);

        StringBuilder fullText = new();

        AsyncResponseStream<GenerateContentResponse> responseStream = response.GetResponseStream();
        await foreach (GenerateContentResponse responseItem in responseStream)
        {
            fullText.Append(responseItem.Candidates[0].Content.Parts[0].Text);
        }
        return fullText.ToString();
    }

    private static async Task<ByteString> ReadImageFileAsync(string url)
    {
        using HttpClient client = new();
        using var response = await client.GetAsync(url);
        byte[] imageBytes = await response.Content.ReadAsByteArrayAsync();
        return ByteString.CopyFrom(imageBytes);
    }
}

Node.js

Sebelum mencoba contoh ini, ikuti petunjuk penyiapan Node.js di Panduan memulai Vertex AI menggunakan library klien. Untuk mengetahui informasi selengkapnya, lihat Dokumentasi referensi API Node.js Vertex AI.

Untuk melakukan autentikasi ke Vertex AI, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, lihat Menyiapkan autentikasi untuk lingkungan pengembangan lokal.

const {VertexAI} = require('@google-cloud/vertexai');
const axios = require('axios');

async function getBase64(url) {
  const image = await axios.get(url, {responseType: 'arraybuffer'});
  return Buffer.from(image.data).toString('base64');
}

/**
 * TODO(developer): Update these variables before running the sample.
 */
async function sendMultiModalPromptWithImage(
  projectId = 'PROJECT_ID',
  location = 'us-central1',
  model = 'gemini-2.0-flash-001'
) {
  // For images, the SDK supports base64 strings
  const landmarkImage1 = await getBase64(
    'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png'
  );
  const landmarkImage2 = await getBase64(
    'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark2.png'
  );
  const landmarkImage3 = await getBase64(
    'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark3.png'
  );

  // Initialize Vertex with your Cloud project and location
  const vertexAI = new VertexAI({project: projectId, location: location});

  const generativeVisionModel = vertexAI.getGenerativeModel({
    model: model,
  });

  // Pass multimodal prompt
  const request = {
    contents: [
      {
        role: 'user',
        parts: [
          {
            inlineData: {
              data: landmarkImage1,
              mimeType: 'image/png',
            },
          },
          {
            text: 'city: Rome, Landmark: the Colosseum',
          },

          {
            inlineData: {
              data: landmarkImage2,
              mimeType: 'image/png',
            },
          },
          {
            text: 'city: Beijing, Landmark: Forbidden City',
          },
          {
            inlineData: {
              data: landmarkImage3,
              mimeType: 'image/png',
            },
          },
        ],
      },
    ],
  };

  // Create the response
  const response = await generativeVisionModel.generateContent(request);
  // Wait for the response to complete
  const aggregatedResponse = await response.response;
  // Select the text from the response
  const fullTextResponse =
    aggregatedResponse.candidates[0].content.parts[0].text;

  console.log(fullTextResponse);
}

Langkah berikutnya

Untuk menelusuri dan memfilter contoh kode untuk produk Google Cloud lainnya, lihat Google Cloud browser contoh.