Hacer seguimiento de objetos en un archivo de video local

Hacer un seguimiento de varios objetos detectados en un archivo de video almacenado de forma local

Explora más

Para obtener documentación en la que se incluye esta muestra de código, consulta lo siguiente:

Muestra de código

Go

Para autenticarte en Video Intelligence, configura las credenciales predeterminadas de la aplicación. Si deseas obtener más información, consulta Configura la autenticación para un entorno de desarrollo local.


import (
	"context"
	"fmt"
	"io"
	"os"

	video "cloud.google.com/go/videointelligence/apiv1"
	videopb "cloud.google.com/go/videointelligence/apiv1/videointelligencepb"
	"github.com/golang/protobuf/ptypes"
)

// objectTracking analyzes a video and extracts entities with their bounding boxes.
func objectTracking(w io.Writer, filename string) error {
	// filename := "../testdata/cat.mp4"

	ctx := context.Background()

	// Creates a client.
	client, err := video.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("video.NewClient: %w", err)
	}
	defer client.Close()

	fileBytes, err := os.ReadFile(filename)
	if err != nil {
		return err
	}

	op, err := client.AnnotateVideo(ctx, &videopb.AnnotateVideoRequest{
		InputContent: fileBytes,
		Features: []videopb.Feature{
			videopb.Feature_OBJECT_TRACKING,
		},
	})
	if err != nil {
		return fmt.Errorf("AnnotateVideo: %w", err)
	}

	resp, err := op.Wait(ctx)
	if err != nil {
		return fmt.Errorf("Wait: %w", err)
	}

	// Only one video was processed, so get the first result.
	result := resp.GetAnnotationResults()[0]

	for _, annotation := range result.ObjectAnnotations {
		fmt.Fprintf(w, "Description: %q\n", annotation.Entity.GetDescription())
		if len(annotation.Entity.EntityId) > 0 {
			fmt.Fprintf(w, "\tEntity ID: %q\n", annotation.Entity.GetEntityId())
		}

		segment := annotation.GetSegment()
		start, _ := ptypes.Duration(segment.GetStartTimeOffset())
		end, _ := ptypes.Duration(segment.GetEndTimeOffset())
		fmt.Fprintf(w, "\tSegment: %v to %v\n", start, end)

		fmt.Fprintf(w, "\tConfidence: %f\n", annotation.GetConfidence())

		// Here we print only the bounding box of the first frame in this segment.
		frame := annotation.GetFrames()[0]
		seconds := float32(frame.GetTimeOffset().GetSeconds())
		nanos := float32(frame.GetTimeOffset().GetNanos())
		fmt.Fprintf(w, "\tTime offset of the first frame: %fs\n", seconds+nanos/1e9)

		box := frame.GetNormalizedBoundingBox()
		fmt.Fprintf(w, "\tBounding box position:\n")
		fmt.Fprintf(w, "\t\tleft  : %f\n", box.GetLeft())
		fmt.Fprintf(w, "\t\ttop   : %f\n", box.GetTop())
		fmt.Fprintf(w, "\t\tright : %f\n", box.GetRight())
		fmt.Fprintf(w, "\t\tbottom: %f\n", box.GetBottom())
	}

	return nil
}

Java

Para autenticarte en Video Intelligence, configura las credenciales predeterminadas de la aplicación. Si deseas obtener más información, consulta Configura la autenticación para un entorno de desarrollo local.

/**
 * Track objects in a video.
 *
 * @param filePath the path to the video file to analyze.
 */
public static VideoAnnotationResults trackObjects(String filePath) throws Exception {
  try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
    // Read file
    Path path = Paths.get(filePath);
    byte[] data = Files.readAllBytes(path);

    // Create the request
    AnnotateVideoRequest request =
        AnnotateVideoRequest.newBuilder()
            .setInputContent(ByteString.copyFrom(data))
            .addFeatures(Feature.OBJECT_TRACKING)
            .setLocationId("us-east1")
            .build();

    // asynchronously perform object tracking on videos
    OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
        client.annotateVideoAsync(request);

    System.out.println("Waiting for operation to complete...");
    // The first result is retrieved because a single video was processed.
    AnnotateVideoResponse response = future.get(450, TimeUnit.SECONDS);
    VideoAnnotationResults results = response.getAnnotationResults(0);

    // Get only the first annotation for demo purposes.
    ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0);
    System.out.println("Confidence: " + annotation.getConfidence());

    if (annotation.hasEntity()) {
      Entity entity = annotation.getEntity();
      System.out.println("Entity description: " + entity.getDescription());
      System.out.println("Entity id:: " + entity.getEntityId());
    }

    if (annotation.hasSegment()) {
      VideoSegment videoSegment = annotation.getSegment();
      Duration startTimeOffset = videoSegment.getStartTimeOffset();
      Duration endTimeOffset = videoSegment.getEndTimeOffset();
      // Display the segment time in seconds, 1e9 converts nanos to seconds
      System.out.println(
          String.format(
              "Segment: %.2fs to %.2fs",
              startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9,
              endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
    }

    // Here we print only the bounding box of the first frame in this segment.
    ObjectTrackingFrame frame = annotation.getFrames(0);
    // Display the offset time in seconds, 1e9 converts nanos to seconds
    Duration timeOffset = frame.getTimeOffset();
    System.out.println(
        String.format(
            "Time offset of the first frame: %.2fs",
            timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));

    // Display the bounding box of the detected object
    NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox();
    System.out.println("Bounding box position:");
    System.out.println("\tleft: " + normalizedBoundingBox.getLeft());
    System.out.println("\ttop: " + normalizedBoundingBox.getTop());
    System.out.println("\tright: " + normalizedBoundingBox.getRight());
    System.out.println("\tbottom: " + normalizedBoundingBox.getBottom());
    return results;
  }
}

Node.js

Para autenticarte en Video Intelligence, configura las credenciales predeterminadas de la aplicación. Si deseas obtener más información, consulta Configura la autenticación para un entorno de desarrollo local.

// Imports the Google Cloud Video Intelligence library
const Video = require('@google-cloud/video-intelligence');
const fs = require('fs');
const util = require('util');
// Creates a client
const video = new Video.VideoIntelligenceServiceClient();
/**
 * TODO(developer): Uncomment the following line before running the sample.
 */
// const path = 'Local file to analyze, e.g. ./my-file.mp4';

// Reads a local video file and converts it to base64
const file = await util.promisify(fs.readFile)(path);
const inputContent = file.toString('base64');

const request = {
  inputContent: inputContent,
  features: ['OBJECT_TRACKING'],
  //recommended to use us-east1 for the best latency due to different types of processors used in this region and others
  locationId: 'us-east1',
};
// Detects objects in a video
const [operation] = await video.annotateVideo(request);
const results = await operation.promise();
console.log('Waiting for operation to complete...');
//Gets annotations for video
const annotations = results[0].annotationResults[0];
const objects = annotations.objectAnnotations;
objects.forEach(object => {
  console.log(`Entity description:  ${object.entity.description}`);
  console.log(`Entity id: ${object.entity.entityId}`);
  const time = object.segment;
  console.log(
    `Segment: ${time.startTimeOffset.seconds || 0}` +
      `.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s to ${
        time.endTimeOffset.seconds || 0
      }.` +
      `${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s`
  );
  console.log(`Confidence: ${object.confidence}`);
  const frame = object.frames[0];
  const box = frame.normalizedBoundingBox;
  const timeOffset = frame.timeOffset;
  console.log(
    `Time offset for the first frame: ${timeOffset.seconds || 0}` +
      `.${(timeOffset.nanos / 1e6).toFixed(0)}s`
  );
  console.log('Bounding box position:');
  console.log(` left   :${box.left}`);
  console.log(` top    :${box.top}`);
  console.log(` right  :${box.right}`);
  console.log(` bottom :${box.bottom}`);
});

PHP

Para autenticarte en Video Intelligence, configura las credenciales predeterminadas de la aplicación. Si deseas obtener más información, consulta Configura la autenticación para un entorno de desarrollo local.

use Google\Cloud\VideoIntelligence\V1\AnnotateVideoRequest;
use Google\Cloud\VideoIntelligence\V1\Client\VideoIntelligenceServiceClient;
use Google\Cloud\VideoIntelligence\V1\Feature;

/**
 * @param string $path    File path to a video file to analyze
 * @param int $pollingIntervalSeconds
 */
function analyze_object_tracking_file(string $path, int $pollingIntervalSeconds = 0)
{
    # Instantiate a client.
    $video = new VideoIntelligenceServiceClient();

    # Read the local video file
    $inputContent = file_get_contents($path);

    # Execute a request.
    $features = [Feature::OBJECT_TRACKING];
    $request = (new AnnotateVideoRequest())
        ->setInputContent($inputContent)
        ->setFeatures($features);
    $operation = $video->annotateVideo($request);

    # Wait for the request to complete.
    $operation->pollUntilComplete([
        'pollingIntervalSeconds' => $pollingIntervalSeconds
    ]);

    # Print the results.
    if ($operation->operationSucceeded()) {
        $results = $operation->getResult()->getAnnotationResults()[0];
        # Process video/segment level label annotations
        $objectEntity = $results->getObjectAnnotations()[0];

        printf('Video object entity: %s' . PHP_EOL, $objectEntity->getEntity()->getEntityId());
        printf('Video object description: %s' . PHP_EOL, $objectEntity->getEntity()->getDescription());

        $start = $objectEntity->getSegment()->getStartTimeOffset();
        $end = $objectEntity->getSegment()->getEndTimeOffset();
        printf('  Segment: %ss to %ss' . PHP_EOL,
            $start->getSeconds() + $start->getNanos() / 1000000000.0,
            $end->getSeconds() + $end->getNanos() / 1000000000.0);
        printf('  Confidence: %f' . PHP_EOL, $objectEntity->getConfidence());

        foreach ($objectEntity->getFrames() as $objectEntityFrame) {
            $offset = $objectEntityFrame->getTimeOffset();
            $boundingBox = $objectEntityFrame->getNormalizedBoundingBox();
            printf('  Time offset: %ss' . PHP_EOL,
                $offset->getSeconds() + $offset->getNanos() / 1000000000.0);
            printf('  Bounding box position:' . PHP_EOL);
            printf('   Left: %s', $boundingBox->getLeft());
            printf('   Top: %s', $boundingBox->getTop());
            printf('   Right: %s', $boundingBox->getRight());
            printf('   Bottom: %s', $boundingBox->getBottom());
        }
        print(PHP_EOL);
    } else {
        print_r($operation->getError());
    }
}

Python

Para autenticarte en Video Intelligence, configura las credenciales predeterminadas de la aplicación. Si deseas obtener más información, consulta Configura la autenticación para un entorno de desarrollo local.

"""Object tracking in a local video."""
from google.cloud import videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.Feature.OBJECT_TRACKING]

with io.open(path, "rb") as file:
    input_content = file.read()

operation = video_client.annotate_video(
    request={"features": features, "input_content": input_content}
)
print("\nProcessing video for object annotations.")

result = operation.result(timeout=500)
print("\nFinished processing.\n")

# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations

# Get only the first annotation for demo purposes.
object_annotation = object_annotations[0]
print("Entity description: {}".format(object_annotation.entity.description))
if object_annotation.entity.entity_id:
    print("Entity id: {}".format(object_annotation.entity.entity_id))

print(
    "Segment: {}s to {}s".format(
        object_annotation.segment.start_time_offset.seconds
        + object_annotation.segment.start_time_offset.microseconds / 1e6,
        object_annotation.segment.end_time_offset.seconds
        + object_annotation.segment.end_time_offset.microseconds / 1e6,
    )
)

print("Confidence: {}".format(object_annotation.confidence))

# Here we print only the bounding box of the first frame in this segment
frame = object_annotation.frames[0]
box = frame.normalized_bounding_box
print(
    "Time offset of the first frame: {}s".format(
        frame.time_offset.seconds + frame.time_offset.microseconds / 1e6
    )
)
print("Bounding box position:")
print("\tleft  : {}".format(box.left))
print("\ttop   : {}".format(box.top))
print("\tright : {}".format(box.right))
print("\tbottom: {}".format(box.bottom))
print("\n")

¿Qué sigue?

Para buscar y filtrar muestras de código para otros productos de Google Cloud , consulta el navegador de muestras deGoogle Cloud .