Hacer seguimiento de objetos en un video en streaming

Hace un seguimiento de varios objetos detectados en un archivo de video en streaming.

Páginas de documentación que incluyen esta muestra de código

Para ver la muestra de código usada en contexto, consulta la siguiente documentación:

Muestra de código

Java


import com.google.api.gax.rpc.BidiStream;
import com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingAnnotation;
import com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingFrame;
import com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoRequest;
import com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoResponse;
import com.google.cloud.videointelligence.v1p3beta1.StreamingFeature;
import com.google.cloud.videointelligence.v1p3beta1.StreamingLabelDetectionConfig;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoAnnotationResults;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoConfig;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoIntelligenceServiceClient;
import com.google.protobuf.ByteString;
import io.grpc.StatusRuntimeException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.concurrent.TimeoutException;

class StreamingObjectTracking {

  // Perform streaming video object tracking
  static void streamingObjectTracking(String filePath)
      throws IOException, TimeoutException, StatusRuntimeException {
    // String filePath = "path_to_your_video_file";

    try (StreamingVideoIntelligenceServiceClient client =
        StreamingVideoIntelligenceServiceClient.create()) {

      Path path = Paths.get(filePath);
      byte[] data = Files.readAllBytes(path);
      // Set the chunk size to 5MB (recommended less than 10MB).
      int chunkSize = 5 * 1024 * 1024;
      int numChunks = (int) Math.ceil((double) data.length / chunkSize);

      StreamingLabelDetectionConfig labelConfig =
          StreamingLabelDetectionConfig.newBuilder().setStationaryCamera(false).build();

      StreamingVideoConfig streamingVideoConfig =
          StreamingVideoConfig.newBuilder()
              .setFeature(StreamingFeature.STREAMING_OBJECT_TRACKING)
              .setLabelDetectionConfig(labelConfig)
              .build();

      BidiStream<StreamingAnnotateVideoRequest, StreamingAnnotateVideoResponse> call =
          client.streamingAnnotateVideoCallable().call();

      // The first request must **only** contain the audio configuration:
      call.send(
          StreamingAnnotateVideoRequest.newBuilder().setVideoConfig(streamingVideoConfig).build());

      // Subsequent requests must **only** contain the audio data.
      // Send the requests in chunks
      for (int i = 0; i < numChunks; i++) {
        call.send(
            StreamingAnnotateVideoRequest.newBuilder()
                .setInputContent(
                    ByteString.copyFrom(
                        Arrays.copyOfRange(data, i * chunkSize, i * chunkSize + chunkSize)))
                .build());
      }

      // Tell the service you are done sending data
      call.closeSend();

      for (StreamingAnnotateVideoResponse response : call) {
        StreamingVideoAnnotationResults annotationResults = response.getAnnotationResults();

        for (ObjectTrackingAnnotation objectAnnotations :
            annotationResults.getObjectAnnotationsList()) {

          String entity = objectAnnotations.getEntity().getDescription();
          float confidence = objectAnnotations.getConfidence();
          long trackId = objectAnnotations.getTrackId();
          System.out.format("%s: %f (ID: %d)\n", entity, confidence, trackId);

          // In streaming, there is always one frame.
          ObjectTrackingFrame frame = objectAnnotations.getFrames(0);
          double offset =
              frame.getTimeOffset().getSeconds() + frame.getTimeOffset().getNanos() / 1e9;
          System.out.format("Offset: %f\n", offset);

          System.out.println("Bounding Box:");
          System.out.format("\tLeft: %f\n", frame.getNormalizedBoundingBox().getLeft());
          System.out.format("\tTop: %f\n", frame.getNormalizedBoundingBox().getTop());
          System.out.format("\tRight: %f\n", frame.getNormalizedBoundingBox().getRight());
          System.out.format("\tBottom: %f\n", frame.getNormalizedBoundingBox().getBottom());
        }
      }
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const path = 'Local file to analyze, e.g. ./my-file.mp4';
const {StreamingVideoIntelligenceServiceClient} =
  require('@google-cloud/video-intelligence').v1p3beta1;
const fs = require('fs');

// Instantiates a client
const client = new StreamingVideoIntelligenceServiceClient();
// Streaming configuration
const configRequest = {
  videoConfig: {
    feature: 'STREAMING_OBJECT_TRACKING',
  },
};
const readStream = fs.createReadStream(path, {
  highWaterMark: 5 * 1024 * 1024, //chunk size set to 5MB (recommended less than 10MB)
  encoding: 'base64',
});
//Load file content
const chunks = [];
readStream
  .on('data', chunk => {
    const request = {
      inputContent: chunk.toString(),
    };
    chunks.push(request);
  })
  .on('close', () => {
    // configRequest should be the first in the stream of requests
    stream.write(configRequest);
    for (let i = 0; i < chunks.length; i++) {
      stream.write(chunks[i]);
    }
    stream.end();
  });

const options = {timeout: 120000};
// Create a job using a long-running operation

const stream = client.streamingAnnotateVideo(options).on('data', response => {
  //Gets annotations for video
  const annotations = response.annotationResults;
  const objects = annotations.objectAnnotations;
  objects.forEach(object => {
    console.log(`Entity description: ${object.entity.description}`);
    console.log(`Entity id: ${object.entity.entityId}`);
    console.log(`Track id: ${object.trackId}`);
    console.log(`Confidence: ${object.confidence}`);
    console.log(
      `Time offset for the frame: ${
        object.frames[0].timeOffset.seconds || 0
      }` + `.${(object.frames[0].timeOffset.nanos / 1e6).toFixed(0)}s`
    );
    //Every annotation has only one frame.
    const box = object.frames[0].normalizedBoundingBox;
    console.log('Bounding box position:');
    console.log(` left  :${box.left}`);
    console.log(` top   :${box.top}`);
    console.log(` right :${box.right}`);
    console.log(` bottom:${box.bottom}`);
  });
});

Python

from google.cloud import videointelligence_v1p3beta1 as videointelligence

# path = 'path_to_file'

client = videointelligence.StreamingVideoIntelligenceServiceClient()

# Set streaming config.
config = videointelligence.StreamingVideoConfig(
    feature=(videointelligence.StreamingFeature.STREAMING_OBJECT_TRACKING)
)

# config_request should be the first in the stream of requests.
config_request = videointelligence.StreamingAnnotateVideoRequest(
    video_config=config
)

# Set the chunk size to 5MB (recommended less than 10MB).
chunk_size = 5 * 1024 * 1024

# Load file content.
stream = []
with io.open(path, "rb") as video_file:
    while True:
        data = video_file.read(chunk_size)
        if not data:
            break
        stream.append(data)

def stream_generator():
    yield config_request
    for chunk in stream:
        yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk)

requests = stream_generator()

# streaming_annotate_video returns a generator.
# The default timeout is about 300 seconds.
# To process longer videos it should be set to
# larger than the length (in seconds) of the stream.
responses = client.streaming_annotate_video(requests, timeout=900)

# Each response corresponds to about 1 second of video.
for response in responses:
    # Check for errors.
    if response.error.message:
        print(response.error.message)
        break

    object_annotations = response.annotation_results.object_annotations

    # object_annotations could be empty
    if not object_annotations:
        continue

    for annotation in object_annotations:
        # Each annotation has one frame, which has a timeoffset.
        frame = annotation.frames[0]
        time_offset = (
            frame.time_offset.seconds + frame.time_offset.microseconds / 1e6
        )

        description = annotation.entity.description
        confidence = annotation.confidence

        # track_id tracks the same object in the video.
        track_id = annotation.track_id

        # description is in Unicode
        print("{}s".format(time_offset))
        print(u"\tEntity description: {}".format(description))
        print("\tTrack Id: {}".format(track_id))
        if annotation.entity.entity_id:
            print("\tEntity id: {}".format(annotation.entity.entity_id))

        print("\tConfidence: {}".format(confidence))

        # Every annotation has only one frame
        frame = annotation.frames[0]
        box = frame.normalized_bounding_box
        print("\tBounding box position:")
        print("\tleft  : {}".format(box.left))
        print("\ttop   : {}".format(box.top))
        print("\tright : {}".format(box.right))
        print("\tbottom: {}\n".format(box.bottom))

¿Qué sigue?

Para buscar y filtrar muestras de código para otros productos de Google Cloud, consulta el navegador de muestra de Google Cloud.