Label analysis

Label analysis detects labels in a video.

The following Python sample code shows how to use Video Intelligence streaming label detection to annotate a video.

Java

import com.google.api.gax.rpc.BidiStream;
import com.google.cloud.videointelligence.v1p3beta1.LabelAnnotation;
import com.google.cloud.videointelligence.v1p3beta1.LabelFrame;
import com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoRequest;
import com.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoResponse;
import com.google.cloud.videointelligence.v1p3beta1.StreamingFeature;
import com.google.cloud.videointelligence.v1p3beta1.StreamingLabelDetectionConfig;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoAnnotationResults;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoConfig;
import com.google.cloud.videointelligence.v1p3beta1.StreamingVideoIntelligenceServiceClient;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;

class StreamingLabelDetection {

  // Perform streaming video label detection
  static void streamingLabelDetection(String filePath) {
    // String filePath = "path_to_your_video_file";

    try (StreamingVideoIntelligenceServiceClient client =
        StreamingVideoIntelligenceServiceClient.create()) {

      Path path = Paths.get(filePath);
      byte[] data = Files.readAllBytes(path);
      // Set the chunk size to 5MB (recommended less than 10MB).
      int chunkSize = 5 * 1024 * 1024;
      int numChunks = (int) Math.ceil((double) data.length / chunkSize);

      StreamingLabelDetectionConfig labelConfig = StreamingLabelDetectionConfig.newBuilder()
          .setStationaryCamera(false)
          .build();

      StreamingVideoConfig streamingVideoConfig = StreamingVideoConfig.newBuilder()
          .setFeature(StreamingFeature.STREAMING_LABEL_DETECTION)
          .setLabelDetectionConfig(labelConfig)
          .build();

      BidiStream<StreamingAnnotateVideoRequest, StreamingAnnotateVideoResponse> call =
          client.streamingAnnotateVideoCallable().call();

      // The first request must **only** contain the audio configuration:
      call.send(
          StreamingAnnotateVideoRequest.newBuilder()
              .setVideoConfig(streamingVideoConfig)
              .build());

      // Subsequent requests must **only** contain the audio data.
      // Send the requests in chunks
      for (int i = 0; i < numChunks; i++) {
        call.send(
            StreamingAnnotateVideoRequest.newBuilder()
                .setInputContent(ByteString.copyFrom(
                    Arrays.copyOfRange(data, i * chunkSize, i * chunkSize + chunkSize)))
                .build());
      }

      // Tell the service you are done sending data
      call.closeSend();

      for (StreamingAnnotateVideoResponse response : call) {
        StreamingVideoAnnotationResults annotationResults = response.getAnnotationResults();

        for (LabelAnnotation annotation : annotationResults.getLabelAnnotationsList()) {
          String entity = annotation.getEntity().getDescription();

          // There is only one frame per annotation
          LabelFrame labelFrame = annotation.getFrames(0);
          double offset = labelFrame.getTimeOffset().getSeconds()
              + labelFrame.getTimeOffset().getNanos() / 1e9;
          float confidence = labelFrame.getConfidence();

          System.out.format("%fs: %s (%f)\n", offset, entity, confidence);
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

Node.js

/**
 * Copyright 2019, Google, LLC
 * Licensed under the Apache License, Version 2.0 (the `License`);
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an `AS IS` BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

'use strict';

async function main(path = 'YOUR_LOCAL_FILE') {
  /**
   * TODO(developer): Uncomment these variables before running the sample.
   */
  // const path = 'Local file to analyze, e.g. ./my-file.mp4';
  const {
    StreamingVideoIntelligenceServiceClient,
  } = require('@google-cloud/video-intelligence').v1p3beta1;
  const fs = require('fs');

  // Instantiates a client
  const client = new StreamingVideoIntelligenceServiceClient();
  // Streaming configuration
  const configRequest = {
    videoConfig: {
      feature: 'STREAMING_LABEL_DETECTION',
    },
  };
  const readStream = fs.createReadStream(path, {
    highWaterMark: 5 * 1024 * 1024, //chunk size set to 5MB (recommended less than 10MB)
    encoding: 'base64',
  });
  //Load file content
  const chunks = [];
  readStream
    .on('data', chunk => {
      const request = {
        inputContent: chunk.toString(),
      };
      chunks.push(request);
    })
    .on('close', function() {
      // configRequest should be the first in the stream of requests
      stream.write(configRequest);
      for (let i = 0; i < chunks.length; i++) {
        stream.write(chunks[i]);
      }
      stream.end();
    });

  const stream = client.streamingAnnotateVideo().on('data', response => {
    //Gets annotations for video
    const annotations = response.annotationResults;
    const labels = annotations.labelAnnotations;
    labels.forEach(label => {
      console.log(
        `Label ${label.entity.description} occurs at: ${label.frames[0]
          .timeOffset.seconds || 0}` +
          `.${(label.frames[0].timeOffset.nanos / 1e6).toFixed(0)}s`
      );
      console.log(` Confidence: ${label.frames[0].confidence}`);
    });
  });
}
main(...process.argv.slice(2)).catch(console.error());

Python

from google.cloud import videointelligence_v1p3beta1 as videointelligence

# path = 'path_to_file'

client = videointelligence.StreamingVideoIntelligenceServiceClient()

# Set streaming config.
config = videointelligence.types.StreamingVideoConfig(
    feature=(videointelligence.enums.
             StreamingFeature.STREAMING_LABEL_DETECTION))

# config_request should be the first in the stream of requests.
config_request = videointelligence.types.StreamingAnnotateVideoRequest(
    video_config=config)

# Set the chunk size to 5MB (recommended less than 10MB).
chunk_size = 5 * 1024 * 1024

# Load file content.
stream = []
with io.open(path, 'rb') as video_file:
    while True:
        data = video_file.read(chunk_size)
        if not data:
            break
        stream.append(data)

def stream_generator():
    yield config_request
    for chunk in stream:
        yield videointelligence.types.StreamingAnnotateVideoRequest(
            input_content=chunk)

requests = stream_generator()

# streaming_annotate_video returns a generator.
responses = client.streaming_annotate_video(requests)

# Each response corresponds to about 1 second of video.
for response in responses:
    # Check for errors.
    if response.error.message:
        print(response.error.message)
        break

    # Get the time offset of the response.
    frame = response.annotation_results.label_annotations[0].frames[0]
    time_offset = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
    print('{}s:'.format(time_offset))

    for annotation in response.annotation_results.label_annotations:
        description = annotation.entity.description
        # Every annotation has only one frame
        confidence = annotation.frames[0].confidence
        # description is in Unicode
        print(u'\t{} (confidence: {})'.format(description, confidence))

Was this page helpful? Let us know how we did:

Send feedback about...

Cloud Video Intelligence API Documentation