为一批图片异步添加注释(Beta 版)

离线时对 Cloud Storage 中的一批图片文件添加注释。

代码示例

Java

试用此示例之前,请按照《Vision 快速入门:使用客户端库》中的 Java 设置说明进行操作。 如需了解详情,请参阅 Vision Java API 参考文档

如需向 Vision 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

import com.google.api.gax.longrunning.OperationFuture;
import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.Bucket;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Storage.BlobListOption;
import com.google.cloud.storage.StorageOptions;
import com.google.cloud.vision.v1p4beta1.AnnotateImageRequest;
import com.google.cloud.vision.v1p4beta1.AsyncBatchAnnotateImagesRequest;
import com.google.cloud.vision.v1p4beta1.AsyncBatchAnnotateImagesResponse;
import com.google.cloud.vision.v1p4beta1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1p4beta1.BatchAnnotateImagesResponse.Builder;
import com.google.cloud.vision.v1p4beta1.Feature;
import com.google.cloud.vision.v1p4beta1.Feature.Type;
import com.google.cloud.vision.v1p4beta1.GcsDestination;
import com.google.cloud.vision.v1p4beta1.Image;
import com.google.cloud.vision.v1p4beta1.ImageAnnotatorClient;
import com.google.cloud.vision.v1p4beta1.ImageSource;
import com.google.cloud.vision.v1p4beta1.OperationMetadata;
import com.google.cloud.vision.v1p4beta1.OutputConfig;
import com.google.protobuf.util.JsonFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class AsyncBatchAnnotateImagesGcs {

  // Performs asynchronous batch annotation of images on Google Cloud Storage
  public static void asyncBatchAnnotateImagesGcs(String gcsSourcePath, String gcsDestinationPath)
      throws Exception {
    // String gcsSourcePath = "gs://YOUR_BUCKET_ID/path_to_your_data";
    // String gcsDestinationPath = "gs://YOUR_BUCKET_ID/path_to_store_annotation";
    try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
      List<AnnotateImageRequest> requests = new ArrayList<>();

      ImageSource imgSource = ImageSource.newBuilder().setImageUri(gcsSourcePath).build();

      Image image = Image.newBuilder().setSource(imgSource).build();

      // Set the GCS destination path for where to save the results.
      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setUri(gcsDestinationPath).build();

      // Create the configuration for the output with the batch size.
      // The batch size sets how many pages should be grouped into each json output file.
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).setBatchSize(2).build();

      // Select the Features required by the vision API
      Feature features =
          Feature.newBuilder()
              .setType(Type.LABEL_DETECTION)
              .setType(Type.TEXT_DETECTION)
              .setType(Type.IMAGE_PROPERTIES)
              .build();

      // Build the request
      AnnotateImageRequest annotateImageRequest =
          AnnotateImageRequest.newBuilder().setImage(image).addFeatures(features).build();

      requests.add(annotateImageRequest);
      AsyncBatchAnnotateImagesRequest request =
          AsyncBatchAnnotateImagesRequest.newBuilder()
              .addAllRequests(requests)
              .setOutputConfig(outputConfig)
              .build();

      OperationFuture<AsyncBatchAnnotateImagesResponse, OperationMetadata> response =
          client.asyncBatchAnnotateImagesAsync(request);
      System.out.println("Waiting for the operation to finish.");

      // we're not processing the response, since we'll be reading the output from GCS.
      response.get(180, TimeUnit.SECONDS);

      // Once the request has completed and the output has been
      // written to GCS, we can list all the output files.
      Storage storage = StorageOptions.getDefaultInstance().getService();

      // Get the destination location from the gcsDestinationPath
      Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)");
      Matcher matcher = pattern.matcher(gcsDestinationPath);

      if (matcher.find()) {
        String bucketName = matcher.group(1);
        String prefix = matcher.group(2);

        // Get the list of objects with the given prefix from the GCS bucket
        Bucket bucket = storage.get(bucketName);
        Page<Blob> pageList = bucket.list(BlobListOption.prefix(prefix));

        Blob firstOutputFile = null;

        // List objects with the given prefix.
        System.out.println("Output files:");
        for (Blob blob : pageList.iterateAll()) {
          System.out.println(blob.getName());

          // Process the first output file from GCS.
          // Since we specified batch size = 2, the first response contains
          // the first two image requests
          if (firstOutputFile == null) {
            firstOutputFile = blob;
          }
        }

        // Get the contents of the file and convert the JSON contents to an
        // BatchAnnotateImagesResponse
        // object. If the Blob is small read all its content in one request
        // (Note: the file is a .json file)
        // Storage guide: https://cloud.google.com/storage/docs/downloading-objects
        String jsonContents = new String(firstOutputFile.getContent());
        Builder builder = BatchAnnotateImagesResponse.newBuilder();
        JsonFormat.parser().merge(jsonContents, builder);

        // Build the AnnotateFileResponse object
        BatchAnnotateImagesResponse batchAnnotateImagesResponse = builder.build();

        // Here we print the response for the first image
        // The response contains more information:
        // annotation/pages/blocks/paragraphs/words/symbols/colors
        // including confidence score and bounding boxes
        System.out.format("\nResponse: %s\n", batchAnnotateImagesResponse.getResponses(0));

      } else {
        System.out.println("No MATCH");
      }
    } catch (Exception e) {
      System.out.println("Error during asyncBatchAnnotateImagesGcs: \n" + e.toString());
    }
  }
}

Python

试用此示例之前,请按照《Vision 快速入门:使用客户端库》中的 Python 设置说明进行操作。 如需了解详情,请参阅 Vision Python API 参考文档

如需向 Vision 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证

def async_batch_annotate_images_uri(input_image_uri, output_uri):
    """Batch annotation of images on Google Cloud Storage asynchronously.

    Args:
    input_image_uri: The path to the image in Google Cloud Storage (gs://...)
    output_uri: The path to the output path in Google Cloud Storage (gs://...)
    """
    import re

    from google.cloud import storage

    from google.cloud import vision_v1p4beta1 as vision

    client = vision.ImageAnnotatorClient()

    # Construct the request for the image(s) to be annotated:
    image_source = vision.ImageSource(image_uri=input_image_uri)
    image = vision.Image(source=image_source)
    features = [
        vision.Feature(type_=vision.Feature.Type.LABEL_DETECTION),
        vision.Feature(type_=vision.Feature.Type.TEXT_DETECTION),
        vision.Feature(type_=vision.Feature.Type.IMAGE_PROPERTIES),
    ]
    requests = [
        vision.AnnotateImageRequest(image=image, features=features),
    ]

    gcs_destination = vision.GcsDestination(uri=output_uri)
    output_config = vision.OutputConfig(gcs_destination=gcs_destination, batch_size=2)

    operation = client.async_batch_annotate_images(
        requests=requests, output_config=output_config
    )

    print("Waiting for the operation to finish.")
    operation.result(timeout=10000)

    # Once the request has completed and the output has been
    # written to Google Cloud Storage, we can list all the output files.
    storage_client = storage.Client()

    match = re.match(r"gs://([^/]+)/(.+)", output_uri)
    bucket_name = match.group(1)
    prefix = match.group(2)

    bucket = storage_client.get_bucket(bucket_name)

    # Lists objects with the given prefix.
    blob_list = list(bucket.list_blobs(prefix=prefix))
    print("Output files:")
    for blob in blob_list:
        print(blob.name)

    # Processes the first output file from Google Cloud Storage.
    # Since we specified batch_size=2, the first response contains
    # annotations for the first two annotate image requests.
    output = blob_list[0]

    json_string = output.download_as_bytes().decode("utf-8")
    response = vision.BatchAnnotateImagesResponse.from_json(json_string)

    # Prints the actual response for the first annotate image request.
    print(
        "The annotation response for the first request: {}".format(
            response.responses[0]
        )
    )

后续步骤

如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器