List objects in an image in JSON format

Output JSON formatted text that list objects that the model can identify from a given image.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

Before trying this sample, follow the C# setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI C# API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

public async Task<string> GenerateContentWithResponseSchema6(
    string projectId = "your-project-id",
    string location = "us-central1",
    string publisher = "google",
    string model = "gemini-1.5-pro-001")
{

    var predictionServiceClient = new PredictionServiceClientBuilder
    {
        Endpoint = $"{location}-aiplatform.googleapis.com"
    }.Build();

    var responseSchema = new OpenApiSchema
    {
        Type = Type.Object,
        Properties =
        {
            ["playlist"] = new()
            {
                Type = Type.Array,
                Items = new()
                {
                    Type = Type.Object,
                    Properties =
                    {
                        ["artist"] = new() { Type = Type.String },
                        ["song"] = new() { Type = Type.String },
                        ["era"] = new() { Type = Type.String },
                        ["released"] = new() { Type = Type.Integer }
                    }
                }
            },
            ["time_start"] = new() { Type = Type.String }
        }
    };

    string prompt = @"
    We have two friends of the host who have requested a few songs for us to play. We're going to start this playlist at 8:15.
    They'll want to hear Black Hole Sun by Soundgarden because their son was born in 1994. They will also want Loser by Beck
    coming right after which is a funny choice considering it's also the same year as their son was born, but that's probably
    just a coincidence. Add Take On Me from A-ha to the list since they were married when the song released in 1985. Their final
    request is Sweet Child O' Mine by Guns N Roses, which I think came out in 1987 when they both finished university.
    Thank you, this party should be great!";

    var generateContentRequest = new GenerateContentRequest
    {
        Model = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
        Contents =
        {
            new Content
            {
                Role = "USER",
                Parts =
                {
                    new Part { Text = prompt }
                }
            }
        },
        GenerationConfig = new GenerationConfig
        {
            ResponseMimeType = "application/json",
            ResponseSchema = responseSchema
        },
    };

    GenerateContentResponse response = await predictionServiceClient.GenerateContentAsync(generateContentRequest);

    string responseText = response.Candidates[0].Content.Parts[0].Text;
    Console.WriteLine(responseText);

    return responseText;
}

Go

Before trying this sample, follow the Go setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Go API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"errors"
	"fmt"
	"io"

	"cloud.google.com/go/vertexai/genai"
)

// controlledGenerationResponseSchema6 shows how to make sure the generated output
// will always be valid JSON and adhere to a specific schema.
func controlledGenerationResponseSchema6(w io.Writer, projectID, location, modelName string) error {
	// location := "us-central1"
	// modelName := "gemini-1.5-pro-001"
	ctx := context.Background()
	client, err := genai.NewClient(ctx, projectID, location)
	if err != nil {
		return fmt.Errorf("unable to create client: %w", err)
	}
	defer client.Close()

	model := client.GenerativeModel(modelName)

	model.GenerationConfig.ResponseMIMEType = "application/json"

	// Build an OpenAPI schema, in memory
	model.GenerationConfig.ResponseSchema = &genai.Schema{
		Type: genai.TypeArray,
		Items: &genai.Schema{
			Type: genai.TypeArray,
			Items: &genai.Schema{
				Type: genai.TypeObject,
				Properties: map[string]*genai.Schema{
					"object": {
						Type: genai.TypeString,
					},
				},
			},
		},
	}

	// These images in Cloud Storage are viewable at
	// https://storage.googleapis.com/cloud-samples-data/generative-ai/image/office-desk.jpeg
	// https://storage.googleapis.com/cloud-samples-data/generative-ai/image/gardening-tools.jpeg

	img1 := genai.FileData{
		MIMEType: "image/jpeg",
		FileURI:  "gs://cloud-samples-data/generative-ai/image/office-desk.jpeg",
	}

	img2 := genai.FileData{
		MIMEType: "image/jpeg",
		FileURI:  "gs://cloud-samples-data/generative-ai/image/gardening-tools.jpeg",
	}

	prompt := "Generate a list of objects in the images."

	res, err := model.GenerateContent(ctx, img1, img2, genai.Text(prompt))
	if err != nil {
		return fmt.Errorf("unable to generate contents: %v", err)
	}

	if len(res.Candidates) == 0 ||
		len(res.Candidates[0].Content.Parts) == 0 {
		return errors.New("empty response from model")
	}

	fmt.Fprint(w, res.Candidates[0].Content.Parts[0])
	return nil
}

Java

Before trying this sample, follow the Java setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Java API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import com.google.cloud.vertexai.VertexAI;
import com.google.cloud.vertexai.api.GenerateContentResponse;
import com.google.cloud.vertexai.api.GenerationConfig;
import com.google.cloud.vertexai.api.Schema;
import com.google.cloud.vertexai.api.Type;
import com.google.cloud.vertexai.generativeai.ContentMaker;
import com.google.cloud.vertexai.generativeai.GenerativeModel;
import com.google.cloud.vertexai.generativeai.PartMaker;
import com.google.cloud.vertexai.generativeai.ResponseHandler;
import java.io.IOException;

public class ControlledGenerationSchema6 {
  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "genai-java-demos";
    String location = "us-central1";
    String modelName = "gemini-1.5-pro-001";

    controlGenerationWithJsonSchema6(projectId, location, modelName);
  }

  // Generate responses that are always valid JSON and comply with a JSON schema
  public static String controlGenerationWithJsonSchema6(
      String projectId, String location, String modelName)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs
    // to be created once, and can be reused for multiple requests.
    try (VertexAI vertexAI = new VertexAI(projectId, location)) {
      GenerationConfig generationConfig = GenerationConfig.newBuilder()
          .setResponseMimeType("application/json")
          .setResponseSchema(Schema.newBuilder()
              .setType(Type.ARRAY)
              .setItems(Schema.newBuilder()
                  .setType(Type.OBJECT)
                  .putProperties("object", Schema.newBuilder().setType(Type.STRING).build())
                  .build())
              .build())
          .build();

      GenerativeModel model = new GenerativeModel(modelName, vertexAI)
          .withGenerationConfig(generationConfig);

      // These images in Cloud Storage are viewable at
      // https://storage.googleapis.com/cloud-samples-data/generative-ai/image/office-desk.jpeg
      // https://storage.googleapis.com/cloud-samples-data/generative-ai/image/gardening-tools.jpeg

      GenerateContentResponse response = model.generateContent(
          ContentMaker.fromMultiModalData(
              PartMaker.fromMimeTypeAndData("image/jpeg",
                  "gs://cloud-samples-data/generative-ai/image/office-desk.jpeg"),
              PartMaker.fromMimeTypeAndData("image/jpeg",
                  "gs://cloud-samples-data/generative-ai/image/gardening-tools.jpeg"),
              "Generate a list of objects in the images."
          )
      );

      String output = ResponseHandler.getText(response);
      System.out.println(output);
      return output;
    }
  }
}

Python

Before trying this sample, follow the Python setup instructions in the Vertex AI quickstart using client libraries. For more information, see the Vertex AI Python API reference documentation.

To authenticate to Vertex AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import vertexai

from vertexai.generative_models import GenerationConfig, GenerativeModel, Part

# TODO(developer): Update and un-comment below line
# PROJECT_ID = "your-project-id"
vertexai.init(project=PROJECT_ID, location="us-central1")

response_schema = {
    "type": "ARRAY",
    "items": {
        "type": "ARRAY",
        "items": {
            "type": "OBJECT",
            "properties": {
                "object": {"type": "STRING"},
            },
        },
    },
}

model = GenerativeModel("gemini-1.5-pro-002")

response = model.generate_content(
    [
        # Text prompt
        "Generate a list of objects in the images.",
        # Http Image
        Part.from_uri(
            "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/office-desk.jpeg",
            "image/jpeg",
        ),
        # Cloud storage object
        Part.from_uri(
            "gs://cloud-samples-data/generative-ai/image/gardening-tools.jpeg",
            "image/jpeg",
        ),
    ],
    generation_config=GenerationConfig(
        response_mime_type="application/json", response_schema=response_schema
    ),
)

print(response.text)
# Example response:
# [
#     [
#         {"object": "globe"}, {"object": "tablet"}, {"object": "toy car"},
#         {"object": "airplane"}, {"object": "keyboard"}, {"object": "mouse"},
#         {"object": "passport"}, {"object": "sunglasses"}, {"object": "money"},
#         {"object": "notebook"}, {"object": "pen"}, {"object": "coffee cup"},
#     ],
#     [
#         {"object": "watering can"}, {"object": "plant"}, {"object": "flower pot"},
#         {"object": "gloves"}, {"object": "garden tool"},
#     ],
# ]

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.