Method: projects.locations.endpoints.streamGenerateContent

Generate content with multimodal inputs with streaming support.

Endpoint

post https://{endpoint}/v1beta1/{model}:streamGenerateContent

Where {service-endpoint} is one of the supported service endpoints.

Path parameters

model string

Required. The fully qualified name of the publisher model or tuned model endpoint to use.

Publisher model format: projects/{project}/locations/{location}/publishers/*/models/*

Tuned model endpoint format: projects/{project}/locations/{location}/endpoints/{endpoint}

Request body

The request body contains data with the following structure:

Fields
contents[] object (Content)

Required. The content of the current conversation with the model.

For single-turn queries, this is a single instance. For multi-turn queries, this is a repeated field that contains conversation history + latest request.

cachedContent string

Optional. The name of the cached content used as context to serve the prediction. Note: only used in explicit caching, where users can have control over caching (e.g. what content to cache) and enjoy guaranteed cost savings. Format: projects/{project}/locations/{location}/cachedContents/{cachedContent}

tools[] object (Tool)

Optional. A list of Tools the model may use to generate the next response.

A Tool is a piece of code that enables the system to interact with external systems to perform an action, or set of actions, outside of knowledge and scope of the model.

toolConfig object (ToolConfig)

Optional. Tool config. This config is shared for all tools provided in the request.

labels map (key: string, value: string)

Optional. The labels with user-defined metadata for the request. It is used for billing and reporting only.

label keys and values can be no longer than 63 characters (Unicode codepoints) and can only contain lowercase letters, numeric characters, underscores, and dashes. International characters are allowed. label values are optional. label keys must start with a letter.

safetySettings[] object (SafetySetting)

Optional. Per request settings for blocking unsafe content. Enforced on GenerateContentResponse.candidates.

generationConfig object (GenerationConfig)

Optional. Generation config.

systemInstruction object (Content)

Optional. The user provided system instructions for the model. Note: only text should be used in parts and content in each part will be in a separate paragraph.

Example request

Text

Go

import (
	"context"
	"errors"
	"fmt"
	"io"

	"cloud.google.com/go/vertexai/genai"
	"google.golang.org/api/iterator"
)

// generateContent shows how to	send a basic streaming text prompt, writing
// the response to the provided io.Writer.
func generateContent(w io.Writer, projectID, modelName string) error {
	ctx := context.Background()

	client, err := genai.NewClient(ctx, projectID, "us-central1")
	if err != nil {
		return fmt.Errorf("unable to create client: %w", err)
	}
	defer client.Close()

	model := client.GenerativeModel(modelName)

	iter := model.GenerateContentStream(
		ctx,
		genai.Text("Write a story about a magic backpack."),
	)
	for {
		resp, err := iter.Next()
		if err == iterator.Done {
			return nil
		}
		if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 {
			return errors.New("empty response from model")
		}
		if err != nil {
			return err
		}
		fmt.Fprint(w, "generated response: ")
		for _, c := range resp.Candidates {
			for _, p := range c.Content.Parts {
				fmt.Fprintf(w, "%s ", p)
			}
		}
	}
}

Java

import com.google.cloud.vertexai.VertexAI;
import com.google.cloud.vertexai.generativeai.GenerativeModel;

public class StreamingQuestionAnswer {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-google-cloud-project-id";
    String location = "us-central1";
    String modelName = "gemini-1.5-flash-001";

    streamingQuestion(projectId, location, modelName);
  }

  // Ask a simple question and get the response via streaming.
  public static void streamingQuestion(String projectId, String location, String modelName)
      throws Exception {
    // Initialize client that will be used to send requests.
    // This client only needs to be created once, and can be reused for multiple requests.
    try (VertexAI vertexAI = new VertexAI(projectId, location)) {
      GenerativeModel model = new GenerativeModel(modelName, vertexAI);

      // Stream the result.
      model.generateContentStream("Write a story about a magic backpack.")
          .stream()
          .forEach(System.out::println);

      System.out.println("Streaming complete.");
    }
  }
}

Node.js

const {VertexAI} = require('@google-cloud/vertexai');

/**
 * TODO(developer): Update these variables before running the sample.
 */
const PROJECT_ID = process.env.CAIP_PROJECT_ID;
const LOCATION = process.env.LOCATION;
const MODEL = 'gemini-1.5-flash-001';

async function generateContent() {
  // Initialize Vertex with your Cloud project and location
  const vertexAI = new VertexAI({project: PROJECT_ID, location: LOCATION});

  // Instantiate the model
  const generativeModel = vertexAI.getGenerativeModel({
    model: MODEL,
  });

  const request = {
    contents: [
      {
        role: 'user',
        parts: [
          {
            text: 'Write a story about a magic backpack.',
          },
        ],
      },
    ],
  };

  console.log(JSON.stringify(request));

  const result = await generativeModel.generateContentStream(request);
  for await (const item of result.stream) {
    console.log(item.candidates[0].content.parts[0].text);
  }
}

Python

import vertexai

from vertexai.generative_models import GenerativeModel

# TODO(developer): Update Project ID
vertexai.init(project=PROJECT_ID, location="us-central1")

model = GenerativeModel("gemini-1.5-flash-002")
responses = model.generate_content(
    "Write a story about a magic backpack.", stream=True
)

for response in responses:
    print(response.text)

Multi-modal

Go

import (
	"context"
	"errors"
	"fmt"
	"io"

	"cloud.google.com/go/vertexai/genai"
	"google.golang.org/api/iterator"
)

func generateContent(w io.Writer, projectID, modelName string) error {
	ctx := context.Background()

	client, err := genai.NewClient(ctx, projectID, "us-central1")
	if err != nil {
		return fmt.Errorf("unable to create client: %w", err)
	}
	defer client.Close()

	model := client.GenerativeModel(modelName)
	iter := model.GenerateContentStream(
		ctx,
		genai.FileData{
			MIMEType: "video/mp4",
			FileURI:  "gs://cloud-samples-data/generative-ai/video/animals.mp4",
		},
		genai.FileData{
			MIMEType: "video/jpeg",
			FileURI:  "gs://cloud-samples-data/generative-ai/image/character.jpg",
		},
		genai.Text("Are these video and image correlated?"),
	)
	for {
		resp, err := iter.Next()
		if err == iterator.Done {
			return nil
		}
		if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 {
			return errors.New("empty response from model")
		}
		if err != nil {
			return err
		}

		fmt.Fprint(w, "generated response: ")
		for _, c := range resp.Candidates {
			for _, p := range c.Content.Parts {
				fmt.Fprintf(w, "%s ", p)
			}
		}
		fmt.Fprint(w, "\n")
	}
}

Java

import com.google.cloud.vertexai.VertexAI;
import com.google.cloud.vertexai.generativeai.ContentMaker;
import com.google.cloud.vertexai.generativeai.GenerativeModel;
import com.google.cloud.vertexai.generativeai.PartMaker;

public class StreamingMultimodal {
  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-google-cloud-project-id";
    String location = "us-central1";
    String modelName = "gemini-1.5-flash-001";

    streamingMultimodal(projectId, location, modelName);
  }

  // Ask a simple question and get the response via streaming.
  public static void streamingMultimodal(String projectId, String location, String modelName)
      throws Exception {
    // Initialize client that will be used to send requests.
    // This client only needs to be created once, and can be reused for multiple requests.
    try (VertexAI vertexAI = new VertexAI(projectId, location)) {
      GenerativeModel model = new GenerativeModel(modelName, vertexAI);

      String videoUri = "gs://cloud-samples-data/video/animals.mp4";
      String imgUri = "gs://cloud-samples-data/generative-ai/image/character.jpg";

      // Stream the result.
      model.generateContentStream(
          ContentMaker.fromMultiModalData(
              PartMaker.fromMimeTypeAndData("video/mp4", videoUri),
              PartMaker.fromMimeTypeAndData("image/jpeg", imgUri),
              "Are this video and image correlated?"
          ))
          .stream()
          .forEach(System.out::println);
    }
  }
}

Node.js

const {VertexAI} = require('@google-cloud/vertexai');

/**
 * TODO(developer): Update these variables before running the sample.
 */
const PROJECT_ID = process.env.CAIP_PROJECT_ID;
const LOCATION = process.env.LOCATION;
const MODEL = 'gemini-1.5-flash-001';

async function generateContent() {
  // Initialize Vertex AI
  const vertexAI = new VertexAI({project: PROJECT_ID, location: LOCATION});
  const generativeModel = vertexAI.getGenerativeModel({model: MODEL});

  const request = {
    contents: [
      {
        role: 'user',
        parts: [
          {
            file_data: {
              file_uri: 'gs://cloud-samples-data/video/animals.mp4',
              mime_type: 'video/mp4',
            },
          },
          {
            file_data: {
              file_uri:
                'gs://cloud-samples-data/generative-ai/image/character.jpg',
              mime_type: 'image/jpeg',
            },
          },
          {text: 'Are this video and image correlated?'},
        ],
      },
    ],
  };

  const result = await generativeModel.generateContentStream(request);

  for await (const item of result.stream) {
    console.log(item.candidates[0].content.parts[0].text);
  }
}

Python

import vertexai

from vertexai.generative_models import GenerativeModel, Part

# TODO(developer): Update & un-comment the lines below
# PROJECT_ID = "your-project-id"

vertexai.init(project=PROJECT_ID, location="us-central1")

model = GenerativeModel("gemini-1.5-flash-002")
responses = model.generate_content(
    [
        Part.from_uri(
            "gs://cloud-samples-data/generative-ai/video/animals.mp4", "video/mp4"
        ),
        Part.from_uri(
            "gs://cloud-samples-data/generative-ai/image/character.jpg",
            "image/jpeg",
        ),
        "Are these video and image correlated?",
    ],
    stream=True,
)

for response in responses:
    print(response)

Response body

If successful, the response body contains a stream of GenerateContentResponse instances.