使用 Gemini 處理 PDF 檔案

這個範例說明如何使用 Gemini 處理 PDF 文件。

深入探索

如需包含這個程式碼範例的詳細說明文件,請參閱下列內容:

程式碼範例

Go

在試用這個範例之前,請先按照Go使用用戶端程式庫的 Vertex AI 快速入門中的操作說明進行設定。 詳情請參閱 Vertex AI Go API 參考說明文件

如要向 Vertex AI 進行驗證,請設定應用程式預設憑證。 詳情請參閱「為本機開發環境設定驗證」。

import (
	"context"
	"fmt"
	"io"

	"google.golang.org/genai"
)

// generateTextWithPDF shows how to generate text using a PDF file input.
func generateTextWithPDF(w io.Writer) error {
	ctx := context.Background()

	client, err := genai.NewClient(ctx, &genai.ClientConfig{
		HTTPOptions: genai.HTTPOptions{APIVersion: "v1"},
	})
	if err != nil {
		return fmt.Errorf("failed to create genai client: %w", err)
	}

	modelName := "gemini-2.5-flash"
	contents := []*genai.Content{
		{Parts: []*genai.Part{
			{Text: `You are a highly skilled document summarization specialist.
	Your task is to provide a concise executive summary of no more than 300 words.
	Please summarize the given document for a general audience.`},
			{FileData: &genai.FileData{
				FileURI:  "gs://cloud-samples-data/generative-ai/pdf/1706.03762v7.pdf",
				MIMEType: "application/pdf",
			}},
		},
			Role: "user"},
	}

	resp, err := client.Models.GenerateContent(ctx, modelName, contents, nil)
	if err != nil {
		return fmt.Errorf("failed to generate content: %w", err)
	}

	respText := resp.Text()

	fmt.Fprintln(w, respText)

	// Example response:
	// "Attention Is All You Need" introduces the Transformer,
	// a groundbreaking neural network architecture designed for...
	// ...

	return nil
}

Java

在試用這個範例之前,請先按照Java使用用戶端程式庫的 Vertex AI 快速入門中的操作說明進行設定。 詳情請參閱 Vertex AI Java API 參考說明文件

如要向 Vertex AI 進行驗證,請設定應用程式預設憑證。 詳情請參閱「為本機開發環境設定驗證」。


import com.google.genai.Client;
import com.google.genai.types.Content;
import com.google.genai.types.GenerateContentResponse;
import com.google.genai.types.HttpOptions;
import com.google.genai.types.Part;

public class TextGenerationWithPdf {

  public static void main(String[] args) {
    // TODO(developer): Replace these variables before running the sample.
    String modelId = "gemini-2.5-flash";
    generateContent(modelId);
  }

  // Generates text with PDF file input
  public static String generateContent(String modelId) {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests.
    try (Client client =
        Client.builder()
            .location("global")
            .vertexAI(true)
            .httpOptions(HttpOptions.builder().apiVersion("v1").build())
            .build()) {

      String prompt =
          "You are a highly skilled document summarization specialist.\n"
              + " Your task is to provide a concise executive summary of no more than 300 words.\n"
              + " Please summarize the given document for a general audience";

      GenerateContentResponse response =
          client.models.generateContent(
              modelId,
              Content.fromParts(
                  Part.fromUri(
                      "gs://cloud-samples-data/generative-ai/pdf/1706.03762v7.pdf",
                      "application/pdf"),
                  Part.fromText(prompt)),
              null);

      System.out.print(response.text());
      // Example response:
      // The document introduces the Transformer, a novel neural network architecture designed for
      // sequence transduction tasks, such as machine translation. Unlike previous dominant models
      // that rely on complex recurrent or convolutional neural networks, the Transformer proposes a
      // simpler, more parallelizable design based *solely* on attention mechanisms, entirely
      // dispensing with recurrence and convolutions...

      return response.text();
    }
  }
}

Node.js

在試用這個範例之前,請先按照Node.js使用用戶端程式庫的 Vertex AI 快速入門中的操作說明進行設定。 詳情請參閱 Vertex AI Node.js API 參考說明文件

如要向 Vertex AI 進行驗證,請設定應用程式預設憑證。 詳情請參閱「為本機開發環境設定驗證」。

const {GoogleGenAI} = require('@google/genai');

const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';

async function generateText(
  projectId = GOOGLE_CLOUD_PROJECT,
  location = GOOGLE_CLOUD_LOCATION
) {
  const client = new GoogleGenAI({
    vertexai: true,
    project: projectId,
    location: location,
  });

  const prompt = `You are a highly skilled document summarization specialist.
    Your task is to provide a concise executive summary of no more than 300 words.
    Please summarize the given document for a general audience.`;

  const pdfFile = {
    fileData: {
      fileUri: 'gs://cloud-samples-data/generative-ai/pdf/1706.03762v7.pdf',
      mimeType: 'application/pdf',
    },
  };

  const response = await client.models.generateContent({
    model: 'gemini-2.5-flash',
    contents: [pdfFile, prompt],
  });

  console.log(response.text);

  // Example response:
  //  Here is a summary of the document in 300 words.
  //  The paper introduces the Transformer, a novel neural network architecture for
  //  sequence transduction tasks like machine translation. Unlike existing models that rely on recurrent or
  //  convolutional layers, the Transformer is based entirely on attention mechanisms.
  //  ...

  return response.text;
}

Python

在試用這個範例之前,請先按照Python使用用戶端程式庫的 Vertex AI 快速入門中的操作說明進行設定。 詳情請參閱 Vertex AI Python API 參考說明文件

如要向 Vertex AI 進行驗證,請設定應用程式預設憑證。 詳情請參閱「為本機開發環境設定驗證」。

from google import genai
from google.genai.types import HttpOptions, Part

client = genai.Client(http_options=HttpOptions(api_version="v1"))
model_id = "gemini-2.5-flash"

prompt = """
You are a highly skilled document summarization specialist.
Your task is to provide a concise executive summary of no more than 300 words.
Please summarize the given document for a general audience.
"""

pdf_file = Part.from_uri(
    file_uri="gs://cloud-samples-data/generative-ai/pdf/1706.03762v7.pdf",
    mime_type="application/pdf",
)

response = client.models.generate_content(
    model=model_id,
    contents=[pdf_file, prompt],
)

print(response.text)
# Example response:
# Here is a summary of the document in 300 words.
#
# The paper introduces the Transformer, a novel neural network architecture for
# sequence transduction tasks like machine translation. Unlike existing models that rely on recurrent or
# convolutional layers, the Transformer is based entirely on attention mechanisms.
# ...

後續步驟

如要搜尋及篩選其他 Google Cloud 產品的程式碼範例,請參閱Google Cloud 範例瀏覽器