Suivre des objets dans un fichier vidéo local

Effectuer le suivi de plusieurs objets détectés dans un fichier vidéo stocké localement

public static object TrackObject(string filePath)
    var client = VideoIntelligenceServiceClient.Create();
    var request = new AnnotateVideoRequest
        InputContent = Google.Protobuf.ByteString.CopyFrom(File.ReadAllBytes(filePath)),
        Features = { Feature.ObjectTracking },
        // It is recommended to use location_id as 'us-east1' for the
        // best latency due to different types of processors used in
        // this region and others.
        LocationId = "us-east1"

    Console.WriteLine("\nProcessing video for object annotations.");
    var op = client.AnnotateVideo(request).PollUntilCompleted();

    Console.WriteLine("\nFinished processing.\n");

    // Retrieve first result because a single video was processed.
    var objectAnnotations = op.Result.AnnotationResults[0]

    // Get only the first annotation for demo purposes
    var objAnnotation = objectAnnotations[0];

        $"Entity description: {objAnnotation.Entity.Description}");

    if (objAnnotation.Entity.EntityId != null)
            $"Entity id: {objAnnotation.Entity.EntityId}");

    Console.Write($"Segment: ");
        String.Format("{0}s to {1}s",
                      objAnnotation.Segment.StartTimeOffset.Seconds +
                      objAnnotation.Segment.StartTimeOffset.Nanos / 1e9,
                      objAnnotation.Segment.EndTimeOffset.Seconds +
                      objAnnotation.Segment.EndTimeOffset.Nanos / 1e9));

    Console.WriteLine($"Confidence: {objAnnotation.Confidence}");

    // Here we print only the bounding box of the first frame in this segment
    var frame = objAnnotation.Frames[0];
    var box = frame.NormalizedBoundingBox;
        String.Format("Time offset of the first frame: {0}s",
                      frame.TimeOffset.Seconds +
                      frame.TimeOffset.Nanos / 1e9));
    Console.WriteLine("Bounding box positions:");
    Console.WriteLine($"\tleft   : {box.Left}");
    Console.WriteLine($"\ttop    : {box.Top}");
    Console.WriteLine($"\tright  : {box.Right}");
    Console.WriteLine($"\tbottom : {box.Bottom}");

    return 0;


import (

	video "cloud.google.com/go/videointelligence/apiv1"
	videopb "google.golang.org/genproto/googleapis/cloud/videointelligence/v1"

// objectTracking analyzes a video and extracts entities with their bounding boxes.
func objectTracking(w io.Writer, filename string) error {
	// filename := "../testdata/cat.mp4"

	ctx := context.Background()

	// Creates a client.
	client, err := video.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("video.NewClient: %v", err)

	fileBytes, err := ioutil.ReadFile(filename)
	if err != nil {
		return err

	op, err := client.AnnotateVideo(ctx, &videopb.AnnotateVideoRequest{
		InputContent: fileBytes,
		Features: []videopb.Feature{
	if err != nil {
		return fmt.Errorf("AnnotateVideo: %v", err)

	resp, err := op.Wait(ctx)
	if err != nil {
		return fmt.Errorf("Wait: %v", err)

	// Only one video was processed, so get the first result.
	result := resp.GetAnnotationResults()[0]

	for _, annotation := range result.ObjectAnnotations {
		fmt.Fprintf(w, "Description: %q\n", annotation.Entity.GetDescription())
		if len(annotation.Entity.EntityId) > 0 {
			fmt.Fprintf(w, "\tEntity ID: %q\n", annotation.Entity.GetEntityId())

		segment := annotation.GetSegment()
		start, _ := ptypes.Duration(segment.GetStartTimeOffset())
		end, _ := ptypes.Duration(segment.GetEndTimeOffset())
		fmt.Fprintf(w, "\tSegment: %v to %v\n", start, end)

		fmt.Fprintf(w, "\tConfidence: %f\n", annotation.GetConfidence())

		// Here we print only the bounding box of the first frame in this segment.
		frame := annotation.GetFrames()[0]
		seconds := float32(frame.GetTimeOffset().GetSeconds())
		nanos := float32(frame.GetTimeOffset().GetNanos())
		fmt.Fprintf(w, "\tTime offset of the first frame: %fs\n", seconds+nanos/1e9)

		box := frame.GetNormalizedBoundingBox()
		fmt.Fprintf(w, "\tBounding box position:\n")
		fmt.Fprintf(w, "\t\tleft  : %f\n", box.GetLeft())
		fmt.Fprintf(w, "\t\ttop   : %f\n", box.GetTop())
		fmt.Fprintf(w, "\t\tright : %f\n", box.GetRight())
		fmt.Fprintf(w, "\t\tbottom: %f\n", box.GetBottom())

	return nil


 * Track objects in a video.
 * @param filePath the path to the video file to analyze.
public static VideoAnnotationResults trackObjects(String filePath) throws Exception {
  try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
    // Read file
    Path path = Paths.get(filePath);
    byte[] data = Files.readAllBytes(path);

    // Create the request
    AnnotateVideoRequest request =

    // asynchronously perform object tracking on videos
    OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =

    System.out.println("Waiting for operation to complete...");
    // The first result is retrieved because a single video was processed.
    AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
    VideoAnnotationResults results = response.getAnnotationResults(0);

    // Get only the first annotation for demo purposes.
    ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0);
    System.out.println("Confidence: " + annotation.getConfidence());

    if (annotation.hasEntity()) {
      Entity entity = annotation.getEntity();
      System.out.println("Entity description: " + entity.getDescription());
      System.out.println("Entity id:: " + entity.getEntityId());

    if (annotation.hasSegment()) {
      VideoSegment videoSegment = annotation.getSegment();
      Duration startTimeOffset = videoSegment.getStartTimeOffset();
      Duration endTimeOffset = videoSegment.getEndTimeOffset();
      // Display the segment time in seconds, 1e9 converts nanos to seconds
              "Segment: %.2fs to %.2fs",
              startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9,
              endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));

    // Here we print only the bounding box of the first frame in this segment.
    ObjectTrackingFrame frame = annotation.getFrames(0);
    // Display the offset time in seconds, 1e9 converts nanos to seconds
    Duration timeOffset = frame.getTimeOffset();
            "Time offset of the first frame: %.2fs",
            timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));

    // Display the bounding box of the detected object
    NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox();
    System.out.println("Bounding box position:");
    System.out.println("\tleft: " + normalizedBoundingBox.getLeft());
    System.out.println("\ttop: " + normalizedBoundingBox.getTop());
    System.out.println("\tright: " + normalizedBoundingBox.getRight());
    System.out.println("\tbottom: " + normalizedBoundingBox.getBottom());
    return results;


// Imports the Google Cloud Video Intelligence library
const Video = require('@google-cloud/video-intelligence');
const fs = require('fs');
const util = require('util');
// Creates a client
const video = new Video.VideoIntelligenceServiceClient();
 * TODO(developer): Uncomment the following line before running the sample.
// const path = 'Local file to analyze, e.g. ./my-file.mp4';

// Reads a local video file and converts it to base64
const file = await util.promisify(fs.readFile)(path);
const inputContent = file.toString('base64');

const request = {
  inputContent: inputContent,
  features: ['OBJECT_TRACKING'],
  //recommended to use us-east1 for the best latency due to different types of processors used in this region and others
  locationId: 'us-east1',
// Detects objects in a video
const [operation] = await video.annotateVideo(request);
const results = await operation.promise();
console.log('Waiting for operation to complete...');
//Gets annotations for video
const annotations = results[0].annotationResults[0];
const objects = annotations.objectAnnotations;
objects.forEach(object => {
  console.log(`Entity description:  ${object.entity.description}`);
  console.log(`Entity id: ${object.entity.entityId}`);
  const time = object.segment;
    `Segment: ${time.startTimeOffset.seconds || 0}` +
      `.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s to ${
        time.endTimeOffset.seconds || 0
      }.` +
      `${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s`
  console.log(`Confidence: ${object.confidence}`);
  const frame = object.frames[0];
  const box = frame.normalizedBoundingBox;
  const timeOffset = frame.timeOffset;
    `Time offset for the first frame: ${timeOffset.seconds || 0}` +
      `.${(timeOffset.nanos / 1e6).toFixed(0)}s`
  console.log('Bounding box position:');
  console.log(` left   :${box.left}`);
  console.log(` top    :${box.top}`);
  console.log(` right  :${box.right}`);
  console.log(` bottom :${box.bottom}`);


use Google\Cloud\VideoIntelligence\V1\VideoIntelligenceServiceClient;
use Google\Cloud\VideoIntelligence\V1\Feature;

/** Uncomment and populate these variables in your code */
// $path = 'File path to a video file to analyze';
// $options = [];

# Instantiate a client.
$video = new VideoIntelligenceServiceClient();

# Read the local video file
$inputContent = file_get_contents($path);

# Execute a request.
$operation = $video->annotateVideo([
    'inputContent' => $inputContent,
    'features' => [Feature::OBJECT_TRACKING]

# Wait for the request to complete.

# Print the results.
if ($operation->operationSucceeded()) {
    $results = $operation->getResult()->getAnnotationResults()[0];
    # Process video/segment level label annotations
    $objectEntity = $results->getObjectAnnotations()[0];

    printf('Video object entity: %s' . PHP_EOL, $objectEntity->getEntity()->getEntityId());
    printf('Video object description: %s' . PHP_EOL, $objectEntity->getEntity()->getDescription());

    $start = $objectEntity->getSegment()->getStartTimeOffset();
    $end = $objectEntity->getSegment()->getEndTimeOffset();
    printf('  Segment: %ss to %ss' . PHP_EOL,
        $start->getSeconds() + $start->getNanos()/1000000000.0,
        $end->getSeconds() + $end->getNanos()/1000000000.0);
    printf('  Confidence: %f' . PHP_EOL, $objectEntity->getConfidence());

    foreach ($objectEntity->getFrames() as $objectEntityFrame) {
        $offset = $objectEntityFrame->getTimeOffset();
        $boundingBox = $objectEntityFrame->getNormalizedBoundingBox();
        printf('  Time offset: %ss' . PHP_EOL,
            $offset->getSeconds() + $offset->getNanos()/1000000000.0);
        printf('  Bounding box position:' . PHP_EOL);
        printf('   Left: %s', $boundingBox->getLeft());
        printf('   Top: %s', $boundingBox->getTop());
        printf('   Right: %s', $boundingBox->getRight());
        printf('   Bottom: %s', $boundingBox->getBottom());
} else {


"""Object tracking in a local video."""
from google.cloud import videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.Feature.OBJECT_TRACKING]

with io.open(path, "rb") as file:
    input_content = file.read()

operation = video_client.annotate_video(
    request={"features": features, "input_content": input_content}
print("\nProcessing video for object annotations.")

result = operation.result(timeout=300)
print("\nFinished processing.\n")

# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations

# Get only the first annotation for demo purposes.
object_annotation = object_annotations[0]
print("Entity description: {}".format(object_annotation.entity.description))
if object_annotation.entity.entity_id:
    print("Entity id: {}".format(object_annotation.entity.entity_id))

    "Segment: {}s to {}s".format(
        + object_annotation.segment.start_time_offset.microseconds / 1e6,
        + object_annotation.segment.end_time_offset.microseconds / 1e6,

print("Confidence: {}".format(object_annotation.confidence))

# Here we print only the bounding box of the first frame in this segment
frame = object_annotation.frames[0]
box = frame.normalized_bounding_box
    "Time offset of the first frame: {}s".format(
        frame.time_offset.seconds + frame.time_offset.microseconds / 1e6
print("Bounding box position:")
print("\tleft  : {}".format(box.left))
print("\ttop   : {}".format(box.top))
print("\tright : {}".format(box.right))
print("\tbottom: {}".format(box.bottom))


# "Path to a local video file: path/to/file.mp4"

require "google/cloud/video_intelligence"

video = Google::Cloud::VideoIntelligence.video_intelligence_service

video_contents = File.binread path

# Register a callback during the method call
operation = video.annotate_video features: [:OBJECT_TRACKING], input_content: video_contents

puts "Processing video for object tracking:"

raise operation.results.message? if operation.error?
puts "Finished Processing."

object_annotations = operation.results.annotation_results.first.object_annotations
print_object_annotations object_annotations