/**
* Track objects in a video.
*
* @param filePath the path to the video file to analyze.
*/
public static VideoAnnotationResults trackObjects(String filePath) throws Exception {
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
// Read file
Path path = Paths.get(filePath);
byte[] data = Files.readAllBytes(path);
// Create the request
AnnotateVideoRequest request =
AnnotateVideoRequest.newBuilder()
.setInputContent(ByteString.copyFrom(data))
.addFeatures(Feature.OBJECT_TRACKING)
.setLocationId("us-east1")
.build();
// asynchronously perform object tracking on videos
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
client.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
// The first result is retrieved because a single video was processed.
AnnotateVideoResponse response = future.get(450, TimeUnit.SECONDS);
VideoAnnotationResults results = response.getAnnotationResults(0);
// Get only the first annotation for demo purposes.
ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0);
System.out.println("Confidence: " + annotation.getConfidence());
if (annotation.hasEntity()) {
Entity entity = annotation.getEntity();
System.out.println("Entity description: " + entity.getDescription());
System.out.println("Entity id:: " + entity.getEntityId());
}
if (annotation.hasSegment()) {
VideoSegment videoSegment = annotation.getSegment();
Duration startTimeOffset = videoSegment.getStartTimeOffset();
Duration endTimeOffset = videoSegment.getEndTimeOffset();
// Display the segment time in seconds, 1e9 converts nanos to seconds
System.out.println(
String.format(
"Segment: %.2fs to %.2fs",
startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9,
endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
}
// Here we print only the bounding box of the first frame in this segment.
ObjectTrackingFrame frame = annotation.getFrames(0);
// Display the offset time in seconds, 1e9 converts nanos to seconds
Duration timeOffset = frame.getTimeOffset();
System.out.println(
String.format(
"Time offset of the first frame: %.2fs",
timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
// Display the bounding box of the detected object
NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox();
System.out.println("Bounding box position:");
System.out.println("\tleft: " + normalizedBoundingBox.getLeft());
System.out.println("\ttop: " + normalizedBoundingBox.getTop());
System.out.println("\tright: " + normalizedBoundingBox.getRight());
System.out.println("\tbottom: " + normalizedBoundingBox.getBottom());
return results;
}
}
// Imports the Google Cloud Video Intelligence library
const Video = require('@google-cloud/video-intelligence');
const fs = require('fs');
const util = require('util');
// Creates a client
const video = new Video.VideoIntelligenceServiceClient();
/**
* TODO(developer): Uncomment the following line before running the sample.
*/
// const path = 'Local file to analyze, e.g. ./my-file.mp4';
// Reads a local video file and converts it to base64
const file = await util.promisify(fs.readFile)(path);
const inputContent = file.toString('base64');
const request = {
inputContent: inputContent,
features: ['OBJECT_TRACKING'],
//recommended to use us-east1 for the best latency due to different types of processors used in this region and others
locationId: 'us-east1',
};
// Detects objects in a video
const [operation] = await video.annotateVideo(request);
const results = await operation.promise();
console.log('Waiting for operation to complete...');
//Gets annotations for video
const annotations = results[0].annotationResults[0];
const objects = annotations.objectAnnotations;
objects.forEach(object => {
console.log(`Entity description: ${object.entity.description}`);
console.log(`Entity id: ${object.entity.entityId}`);
const time = object.segment;
console.log(
`Segment: ${time.startTimeOffset.seconds || 0}` +
`.${(time.startTimeOffset.nanos / 1e6).toFixed(0)}s to ${
time.endTimeOffset.seconds || 0
}.` +
`${(time.endTimeOffset.nanos / 1e6).toFixed(0)}s`
);
console.log(`Confidence: ${object.confidence}`);
const frame = object.frames[0];
const box = frame.normalizedBoundingBox;
const timeOffset = frame.timeOffset;
console.log(
`Time offset for the first frame: ${timeOffset.seconds || 0}` +
`.${(timeOffset.nanos / 1e6).toFixed(0)}s`
);
console.log('Bounding box position:');
console.log(` left :${box.left}`);
console.log(` top :${box.top}`);
console.log(` right :${box.right}`);
console.log(` bottom :${box.bottom}`);
});
"""Object tracking in a local video."""
from google.cloud import videointelligence
video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.Feature.OBJECT_TRACKING]
with io.open(path, "rb") as file:
input_content = file.read()
operation = video_client.annotate_video(
request={"features": features, "input_content": input_content}
)
print("\nProcessing video for object annotations.")
result = operation.result(timeout=500)
print("\nFinished processing.\n")
# The first result is retrieved because a single video was processed.
object_annotations = result.annotation_results[0].object_annotations
# Get only the first annotation for demo purposes.
object_annotation = object_annotations[0]
print("Entity description: {}".format(object_annotation.entity.description))
if object_annotation.entity.entity_id:
print("Entity id: {}".format(object_annotation.entity.entity_id))
print(
"Segment: {}s to {}s".format(
object_annotation.segment.start_time_offset.seconds
+ object_annotation.segment.start_time_offset.microseconds / 1e6,
object_annotation.segment.end_time_offset.seconds
+ object_annotation.segment.end_time_offset.microseconds / 1e6,
)
)
print("Confidence: {}".format(object_annotation.confidence))
# Here we print only the bounding box of the first frame in this segment
frame = object_annotation.frames[0]
box = frame.normalized_bounding_box
print(
"Time offset of the first frame: {}s".format(
frame.time_offset.seconds + frame.time_offset.microseconds / 1e6
)
)
print("Bounding box position:")
print("\tleft : {}".format(box.left))
print("\ttop : {}".format(box.top))
print("\tright : {}".format(box.right))
print("\tbottom: {}".format(box.bottom))
print("\n")