Object tracking

Object tracking tracks multiple objects detected in an input video.

The following code sample demonstrates how to get object detection using the streaming client library.

Java

import com.google.api.gax.rpc.BidiStream;
import com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingAnnotation;
import com.google.cloud.videointelligence.v1p3beta1.ObjectTrackingFrame;
import com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoRequest;
import com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoResponse;
import com.google.cloud.videointelligence.v1p3beta1.StreamingFeature;
import com.google.cloud.videointelligence.v1p3beta1.StreamingLabelDetectionConfig;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoAnnotationResults;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoConfig;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoIntelligenceServiceClient;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;

class StreamingObjectTracking {

  // Perform streaming video object tracking
  static void streamingObjectTracking(String filePath) {
    // String filePath = "path_to_your_video_file";

    try (StreamingVideoIntelligenceServiceClient client =
        StreamingVideoIntelligenceServiceClient.create()) {

      Path path = Paths.get(filePath);
      byte[] data = Files.readAllBytes(path);
      // Set the chunk size to 5MB (recommended less than 10MB).
      int chunkSize = 5 * 1024 * 1024;
      int numChunks = (int) Math.ceil((double) data.length / chunkSize);

      StreamingLabelDetectionConfig labelConfig = StreamingLabelDetectionConfig.newBuilder()
          .setStationaryCamera(false)
          .build();

      StreamingVideoConfig streamingVideoConfig = StreamingVideoConfig.newBuilder()
          .setFeature(StreamingFeature.STREAMING_OBJECT_TRACKING)
          .setLabelDetectionConfig(labelConfig)
          .build();

      BidiStream<StreamingAnnotateVideoRequest, StreamingAnnotateVideoResponse> call =
          client.streamingAnnotateVideoCallable().call();

      // The first request must **only** contain the audio configuration:
      call.send(
          StreamingAnnotateVideoRequest.newBuilder()
              .setVideoConfig(streamingVideoConfig)
              .build());

      // Subsequent requests must **only** contain the audio data.
      // Send the requests in chunks
      for (int i = 0; i < numChunks; i++) {
        call.send(
            StreamingAnnotateVideoRequest.newBuilder()
                .setInputContent(ByteString.copyFrom(
                    Arrays.copyOfRange(data, i * chunkSize, i * chunkSize + chunkSize)))
                .build());
      }

      // Tell the service you are done sending data
      call.closeSend();

      for (StreamingAnnotateVideoResponse response : call) {
        StreamingVideoAnnotationResults annotationResults = response.getAnnotationResults();

        for (ObjectTrackingAnnotation objectAnnotations :
            annotationResults.getObjectAnnotationsList()) {

          String entity = objectAnnotations.getEntity().getDescription();
          float confidence = objectAnnotations.getConfidence();
          long trackId = objectAnnotations.getTrackId();
          System.out.format("%s: %f (ID: %d)\n", entity, confidence, trackId);

          // In streaming, there is always one frame.
          ObjectTrackingFrame frame = objectAnnotations.getFrames(0);
          double offset = frame.getTimeOffset().getSeconds()
              + frame.getTimeOffset().getNanos() / 1e9;
          System.out.format("Offset: %f\n", offset);

          System.out.println("Bounding Box:");
          System.out.format("\tLeft: %f\n", frame.getNormalizedBoundingBox().getLeft());
          System.out.format("\tTop: %f\n", frame.getNormalizedBoundingBox().getTop());
          System.out.format("\tRight: %f\n", frame.getNormalizedBoundingBox().getRight());
          System.out.format("\tBottom: %f\n", frame.getNormalizedBoundingBox().getBottom());
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const path = 'Local file to analyze, e.g. ./my-file.mp4';
const {
  StreamingVideoIntelligenceServiceClient,
} = require('@google-cloud/video-intelligence').v1p3beta1;
const fs = require('fs');

// Instantiates a client
const client = new StreamingVideoIntelligenceServiceClient();
// Streaming configuration
const configRequest = {
  videoConfig: {
    feature: 'STREAMING_OBJECT_TRACKING',
  },
};
const readStream = fs.createReadStream(path, {
  highWaterMark: 5 * 1024 * 1024, //chunk size set to 5MB (recommended less than 10MB)
  encoding: 'base64',
});
//Load file content
const chunks = [];
readStream
  .on('data', chunk => {
    const request = {
      inputContent: chunk.toString(),
    };
    chunks.push(request);
  })
  .on('close', function() {
    // configRequest should be the first in the stream of requests
    stream.write(configRequest);
    for (let i = 0; i < chunks.length; i++) {
      stream.write(chunks[i]);
    }
    stream.end();
  });

const stream = client.streamingAnnotateVideo().on('data', response => {
  //Gets annotations for video
  const annotations = response.annotationResults;
  const objects = annotations.objectAnnotations;
  objects.forEach(object => {
    console.log(`Entity description: ${object.entity.description}`);
    console.log(`Entity id: ${object.entity.entityId}`);
    console.log(`Track id: ${object.trackId}`);
    console.log(`Confidence: ${object.confidence}`);
    console.log(
      `Time offset for the frame: ${object.frames[0].timeOffset.seconds ||
        0}` + `.${(object.frames[0].timeOffset.nanos / 1e6).toFixed(0)}s`
    );
    //Every annotation has only one frame.
    const box = object.frames[0].normalizedBoundingBox;
    console.log(`Bounding box position:`);
    console.log(` left  :${box.left}`);
    console.log(` top   :${box.top}`);
    console.log(` right :${box.right}`);
    console.log(` bottom:${box.bottom}`);
  });
});

Python

from google.cloud import videointelligence_v1p3beta1 as videointelligence

# path = 'path_to_file'

client = videointelligence.StreamingVideoIntelligenceServiceClient()

# Set streaming config.
config = videointelligence.types.StreamingVideoConfig(
    feature=(videointelligence.enums.
             StreamingFeature.STREAMING_OBJECT_TRACKING))

# config_request should be the first in the stream of requests.
config_request = videointelligence.types.StreamingAnnotateVideoRequest(
    video_config=config)

# Set the chunk size to 5MB (recommended less than 10MB).
chunk_size = 5 * 1024 * 1024

# Load file content.
stream = []
with io.open(path, 'rb') as video_file:
    while True:
        data = video_file.read(chunk_size)
        if not data:
            break
        stream.append(data)

def stream_generator():
    yield config_request
    for chunk in stream:
        yield videointelligence.types.StreamingAnnotateVideoRequest(
            input_content=chunk)

requests = stream_generator()

# streaming_annotate_video returns a generator.
# The default timeout is about 300 seconds.
# To process longer videos it should be set to
# larger than the length (in seconds) of the stream.
responses = client.streaming_annotate_video(requests, timeout=600)

# Each response corresponds to about 1 second of video.
for response in responses:
    # Check for errors.
    if response.error.message:
        print(response.error.message)
        break

    object_annotations = response.annotation_results.object_annotations

    # object_annotations could be empty
    if not object_annotations:
        continue

    for annotation in object_annotations:
        # Each annotation has one frame, which has a timeoffset.
        frame = annotation.frames[0]
        time_offset = frame.time_offset.seconds + \
            frame.time_offset.nanos / 1e9

        description = annotation.entity.description
        confidence = annotation.confidence

        # track_id tracks the same object in the video.
        track_id = annotation.track_id

        # description is in Unicode
        print('{}s'.format(time_offset))
        print(u'\tEntity description: {}'.format(description))
        print('\tTrack Id: {}'.format(track_id))
        if annotation.entity.entity_id:
            print('\tEntity id: {}'.format(annotation.entity.entity_id))

        print('\tConfidence: {}'.format(confidence))

        # Every annotation has only one frame
        frame = annotation.frames[0]
        box = frame.normalized_bounding_box
        print('\tBounding box position:')
        print('\tleft  : {}'.format(box.left))
        print('\ttop   : {}'.format(box.top))
        print('\tright : {}'.format(box.right))
        print('\tbottom: {}\n'.format(box.bottom))

هل كانت هذه الصفحة مفيدة؟ يرجى تقييم أدائنا:

إرسال تعليقات حول...

Cloud Video Intelligence API Documentation