Vertex AI의 최신 멀티모달 모델인 Gemini 1.5 모델을 사용해 보고 최대 2백만 개의 토큰 컨텍스트 윈도우를 사용해 무엇을 빌드할 수 있는지 확인해 보세요.Vertex AI의 최신 멀티모달 모델인 Gemini 1.5 모델을 사용해 보고 최대 2백만 개의 토큰 컨텍스트 윈도우를 사용해 무엇을 빌드할 수 있는지 확인해 보세요.
INPUT_URI: 파일 이름을 포함하여 주석을 추가하고자 하는 파일을 포함한 Cloud Storage 버킷입니다. gs://로 시작해야 합니다. 예를 들면 다음과 같습니다. "inputUri": "gs://cloud-samples-data/video/googlework_short.mp4"
PROJECT_NUMBER: Google Cloud 프로젝트의 숫자 식별자
HTTP 메서드 및 URL:
POST https://videointelligence.googleapis.com/v1/videos:annotate
Video Intelligence에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다.
자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.
import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.videointelligence.v1.AnnotateVideoProgress;
import com.google.cloud.videointelligence.v1.AnnotateVideoRequest;
import com.google.cloud.videointelligence.v1.AnnotateVideoResponse;
import com.google.cloud.videointelligence.v1.DetectedAttribute;
import com.google.cloud.videointelligence.v1.DetectedLandmark;
import com.google.cloud.videointelligence.v1.Feature;
import com.google.cloud.videointelligence.v1.PersonDetectionAnnotation;
import com.google.cloud.videointelligence.v1.PersonDetectionConfig;
import com.google.cloud.videointelligence.v1.TimestampedObject;
import com.google.cloud.videointelligence.v1.Track;
import com.google.cloud.videointelligence.v1.VideoAnnotationResults;
import com.google.cloud.videointelligence.v1.VideoContext;
import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient;
import com.google.cloud.videointelligence.v1.VideoSegment;
public class DetectPersonGcs {
public static void detectPersonGcs() throws Exception {
// TODO(developer): Replace these variables before running the sample.
String gcsUri = "gs://cloud-samples-data/video/googlework_short.mp4";
detectPersonGcs(gcsUri);
}
// Detects people in a video stored in Google Cloud Storage using
// the Cloud Video Intelligence API.
public static void detectPersonGcs(String gcsUri) throws Exception {
try (VideoIntelligenceServiceClient videoIntelligenceServiceClient =
VideoIntelligenceServiceClient.create()) {
// Reads a local video file and converts it to base64.
PersonDetectionConfig personDetectionConfig =
PersonDetectionConfig.newBuilder()
// Must set includeBoundingBoxes to true to get poses and attributes.
.setIncludeBoundingBoxes(true)
.setIncludePoseLandmarks(true)
.setIncludeAttributes(true)
.build();
VideoContext videoContext =
VideoContext.newBuilder().setPersonDetectionConfig(personDetectionConfig).build();
AnnotateVideoRequest request =
AnnotateVideoRequest.newBuilder()
.setInputUri(gcsUri)
.addFeatures(Feature.PERSON_DETECTION)
.setVideoContext(videoContext)
.build();
// Detects people in a video
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
videoIntelligenceServiceClient.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
AnnotateVideoResponse response = future.get();
// Get the first response, since we sent only one video.
VideoAnnotationResults annotationResult = response.getAnnotationResultsList().get(0);
// Annotations for list of people detected, tracked and recognized in video.
for (PersonDetectionAnnotation personDetectionAnnotation :
annotationResult.getPersonDetectionAnnotationsList()) {
System.out.print("Person detected:\n");
for (Track track : personDetectionAnnotation.getTracksList()) {
VideoSegment segment = track.getSegment();
System.out.printf(
"\tStart: %d.%.0fs\n",
segment.getStartTimeOffset().getSeconds(),
segment.getStartTimeOffset().getNanos() / 1e6);
System.out.printf(
"\tEnd: %d.%.0fs\n",
segment.getEndTimeOffset().getSeconds(), segment.getEndTimeOffset().getNanos() / 1e6);
// Each segment includes timestamped objects that include characteristic--e.g. clothes,
// posture of the person detected.
TimestampedObject firstTimestampedObject = track.getTimestampedObjects(0);
// Attributes include unique pieces of clothing, poses (i.e., body landmarks)
// of the person detected.
for (DetectedAttribute attribute : firstTimestampedObject.getAttributesList()) {
System.out.printf(
"\tAttribute: %s; Value: %s\n", attribute.getName(), attribute.getValue());
}
// Landmarks in person detection include body parts.
for (DetectedLandmark attribute : firstTimestampedObject.getLandmarksList()) {
System.out.printf(
"\tLandmark: %s; Vertex: %f, %f\n",
attribute.getName(), attribute.getPoint().getX(), attribute.getPoint().getY());
}
}
}
}
}
}
Node.js
Video Intelligence에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다.
자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
// const gcsUri = 'GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4';
// Imports the Google Cloud Video Intelligence library + Node's fs library
const Video = require('@google-cloud/video-intelligence').v1;
// Creates a client
const video = new Video.VideoIntelligenceServiceClient();
async function detectPersonGCS() {
const request = {
inputUri: gcsUri,
features: ['PERSON_DETECTION'],
videoContext: {
personDetectionConfig: {
// Must set includeBoundingBoxes to true to get poses and attributes.
includeBoundingBoxes: true,
includePoseLandmarks: true,
includeAttributes: true,
},
},
};
// Detects faces in a video
// We get the first result because we only process 1 video
const [operation] = await video.annotateVideo(request);
const results = await operation.promise();
console.log('Waiting for operation to complete...');
// Gets annotations for video
const personAnnotations =
results[0].annotationResults[0].personDetectionAnnotations;
for (const {tracks} of personAnnotations) {
console.log('Person detected:');
for (const {segment, timestampedObjects} of tracks) {
console.log(
`\tStart: ${segment.startTimeOffset.seconds}` +
`.${(segment.startTimeOffset.nanos / 1e6).toFixed(0)}s`
);
console.log(
`\tEnd: ${segment.endTimeOffset.seconds}.` +
`${(segment.endTimeOffset.nanos / 1e6).toFixed(0)}s`
);
// Each segment includes timestamped objects that
// include characteristic--e.g. clothes, posture
// of the person detected.
const [firstTimestampedObject] = timestampedObjects;
// Attributes include unique pieces of clothing, poses (i.e., body
// landmarks) of the person detected.
for (const {name, value} of firstTimestampedObject.attributes) {
console.log(`\tAttribute: ${name}; Value: ${value}`);
}
// Landmarks in person detection include body parts.
for (const {name, point} of firstTimestampedObject.landmarks) {
console.log(`\tLandmark: ${name}; Vertex: ${point.x}, ${point.y}`);
}
}
}
}
detectPersonGCS();
Python
Video Intelligence에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다.
자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.
from google.cloud import videointelligence_v1 as videointelligence
def detect_person(gcs_uri="gs://YOUR_BUCKET_ID/path/to/your/video.mp4"):
"""Detects people in a video."""
client = videointelligence.VideoIntelligenceServiceClient()
# Configure the request
config = videointelligence.types.PersonDetectionConfig(
include_bounding_boxes=True,
include_attributes=True,
include_pose_landmarks=True,
)
context = videointelligence.types.VideoContext(person_detection_config=config)
# Start the asynchronous request
operation = client.annotate_video(
request={
"features": [videointelligence.Feature.PERSON_DETECTION],
"input_uri": gcs_uri,
"video_context": context,
}
)
print("\nProcessing video for person detection annotations.")
result = operation.result(timeout=300)
print("\nFinished processing.\n")
# Retrieve the first result, because a single video was processed.
annotation_result = result.annotation_results[0]
for annotation in annotation_result.person_detection_annotations:
print("Person detected:")
for track in annotation.tracks:
print(
"Segment: {}s to {}s".format(
track.segment.start_time_offset.seconds
+ track.segment.start_time_offset.microseconds / 1e6,
track.segment.end_time_offset.seconds
+ track.segment.end_time_offset.microseconds / 1e6,
)
)
# Each segment includes timestamped objects that include
# characteristics - -e.g.clothes, posture of the person detected.
# Grab the first timestamped object
timestamped_object = track.timestamped_objects[0]
box = timestamped_object.normalized_bounding_box
print("Bounding box:")
print("\tleft : {}".format(box.left))
print("\ttop : {}".format(box.top))
print("\tright : {}".format(box.right))
print("\tbottom: {}".format(box.bottom))
# Attributes include unique pieces of clothing,
# poses, or hair color.
print("Attributes:")
for attribute in timestamped_object.attributes:
print(
"\t{}:{} {}".format(
attribute.name, attribute.value, attribute.confidence
)
)
# Landmarks in person detection include body parts such as
# left_shoulder, right_ear, and right_ankle
print("Landmarks:")
for landmark in timestamped_object.landmarks:
print(
"\t{}: {} (x={}, y={})".format(
landmark.name,
landmark.confidence,
landmark.point.x, # Normalized vertex
landmark.point.y, # Normalized vertex
)
)
다음 예시는 사람 감지를 사용해서 로컬 머신에 업로드된 동영상 파일에서 동영상에 있는 항목을 찾습니다.
REST
프로세스 요청 전송
로컬 동영상 파일에서 사람 감지를 수행하려면 동영상 파일의 콘텐츠를 base64로 인코딩해야 합니다. 동영상 파일의 콘텐츠를 base64로 인코딩하는 방법에 대한 자세한 내용은 Base64 인코딩을 참조하세요. 그런 다음 videos:annotate 메서드에 대해 POST 요청을 실행합니다. 요청의 inputContent 필드에 base64 인코딩 콘텐츠를 포함하고 PERSON_DETECTION 기능을 지정합니다.
다음은 curl을 사용한 POST 요청의 예시를 보여줍니다. 이 예시에서는 Google Cloud CLI를 사용하여 액세스 토큰을 만듭니다. gcloud CLI 설치에 대한 안내는 Video Intelligence API 빠른 시작을 참조하세요.
요청 데이터를 사용하기 전에 다음을 바꿉니다.
inputContent: 바이너리 형식의 로컬 동영상 파일
예: 'AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAGVYW1vb3YAAABsbXZoZAAAAADWvhlR1r4ZUQABX5ABCOxo
AAEAAAEAAAAAAA4...'
PROJECT_NUMBER: Google Cloud 프로젝트의 숫자 식별자
HTTP 메서드 및 URL:
POST https://videointelligence.googleapis.com/v1/videos:annotate
Video Intelligence에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다.
자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.
import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.videointelligence.v1.AnnotateVideoProgress;
import com.google.cloud.videointelligence.v1.AnnotateVideoRequest;
import com.google.cloud.videointelligence.v1.AnnotateVideoResponse;
import com.google.cloud.videointelligence.v1.DetectedAttribute;
import com.google.cloud.videointelligence.v1.DetectedLandmark;
import com.google.cloud.videointelligence.v1.Feature;
import com.google.cloud.videointelligence.v1.PersonDetectionAnnotation;
import com.google.cloud.videointelligence.v1.PersonDetectionConfig;
import com.google.cloud.videointelligence.v1.TimestampedObject;
import com.google.cloud.videointelligence.v1.Track;
import com.google.cloud.videointelligence.v1.VideoAnnotationResults;
import com.google.cloud.videointelligence.v1.VideoContext;
import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient;
import com.google.cloud.videointelligence.v1.VideoSegment;
import com.google.protobuf.ByteString;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
public class DetectPerson {
public static void detectPerson() throws Exception {
// TODO(developer): Replace these variables before running the sample.
String localFilePath = "resources/googlework_short.mp4";
detectPerson(localFilePath);
}
// Detects people in a video stored in a local file using the Cloud Video Intelligence API.
public static void detectPerson(String localFilePath) throws Exception {
try (VideoIntelligenceServiceClient videoIntelligenceServiceClient =
VideoIntelligenceServiceClient.create()) {
// Reads a local video file and converts it to base64.
Path path = Paths.get(localFilePath);
byte[] data = Files.readAllBytes(path);
ByteString inputContent = ByteString.copyFrom(data);
PersonDetectionConfig personDetectionConfig =
PersonDetectionConfig.newBuilder()
// Must set includeBoundingBoxes to true to get poses and attributes.
.setIncludeBoundingBoxes(true)
.setIncludePoseLandmarks(true)
.setIncludeAttributes(true)
.build();
VideoContext videoContext =
VideoContext.newBuilder().setPersonDetectionConfig(personDetectionConfig).build();
AnnotateVideoRequest request =
AnnotateVideoRequest.newBuilder()
.setInputContent(inputContent)
.addFeatures(Feature.PERSON_DETECTION)
.setVideoContext(videoContext)
.build();
// Detects people in a video
// We get the first result because only one video is processed.
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
videoIntelligenceServiceClient.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
AnnotateVideoResponse response = future.get();
// Gets annotations for video
VideoAnnotationResults annotationResult = response.getAnnotationResultsList().get(0);
// Annotations for list of people detected, tracked and recognized in video.
for (PersonDetectionAnnotation personDetectionAnnotation :
annotationResult.getPersonDetectionAnnotationsList()) {
System.out.print("Person detected:\n");
for (Track track : personDetectionAnnotation.getTracksList()) {
VideoSegment segment = track.getSegment();
System.out.printf(
"\tStart: %d.%.0fs\n",
segment.getStartTimeOffset().getSeconds(),
segment.getStartTimeOffset().getNanos() / 1e6);
System.out.printf(
"\tEnd: %d.%.0fs\n",
segment.getEndTimeOffset().getSeconds(), segment.getEndTimeOffset().getNanos() / 1e6);
// Each segment includes timestamped objects that include characteristic--e.g. clothes,
// posture of the person detected.
TimestampedObject firstTimestampedObject = track.getTimestampedObjects(0);
// Attributes include unique pieces of clothing, poses (i.e., body landmarks)
// of the person detected.
for (DetectedAttribute attribute : firstTimestampedObject.getAttributesList()) {
System.out.printf(
"\tAttribute: %s; Value: %s\n", attribute.getName(), attribute.getValue());
}
// Landmarks in person detection include body parts.
for (DetectedLandmark attribute : firstTimestampedObject.getLandmarksList()) {
System.out.printf(
"\tLandmark: %s; Vertex: %f, %f\n",
attribute.getName(), attribute.getPoint().getX(), attribute.getPoint().getY());
}
}
}
}
}
}
Node.js
Video Intelligence에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다.
자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
// const gcsUri = 'GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4';
// Imports the Google Cloud Video Intelligence library + Node's fs library
const Video = require('@google-cloud/video-intelligence').v1;
const fs = require('fs');
// Creates a client
const video = new Video.VideoIntelligenceServiceClient();
/**
* TODO(developer): Uncomment the following line before running the sample.
*/
// const path = 'Local file to analyze, e.g. ./my-file.mp4';
// Reads a local video file and converts it to base64
const file = fs.readFileSync(path);
const inputContent = file.toString('base64');
async function detectPerson() {
const request = {
inputContent: inputContent,
features: ['PERSON_DETECTION'],
videoContext: {
personDetectionConfig: {
// Must set includeBoundingBoxes to true to get poses and attributes.
includeBoundingBoxes: true,
includePoseLandmarks: true,
includeAttributes: true,
},
},
};
// Detects faces in a video
// We get the first result because we only process 1 video
const [operation] = await video.annotateVideo(request);
const results = await operation.promise();
console.log('Waiting for operation to complete...');
// Gets annotations for video
const personAnnotations =
results[0].annotationResults[0].personDetectionAnnotations;
for (const {tracks} of personAnnotations) {
console.log('Person detected:');
for (const {segment, timestampedObjects} of tracks) {
console.log(
`\tStart: ${segment.startTimeOffset.seconds}` +
`.${(segment.startTimeOffset.nanos / 1e6).toFixed(0)}s`
);
console.log(
`\tEnd: ${segment.endTimeOffset.seconds}.` +
`${(segment.endTimeOffset.nanos / 1e6).toFixed(0)}s`
);
// Each segment includes timestamped objects that
// include characteristic--e.g. clothes, posture
// of the person detected.
const [firstTimestampedObject] = timestampedObjects;
// Attributes include unique pieces of clothing, poses (i.e., body
// landmarks) of the person detected.
for (const {name, value} of firstTimestampedObject.attributes) {
console.log(`\tAttribute: ${name}; Value: ${value}`);
}
// Landmarks in person detection include body parts.
for (const {name, point} of firstTimestampedObject.landmarks) {
console.log(`\tLandmark: ${name}; Vertex: ${point.x}, ${point.y}`);
}
}
}
}
detectPerson();
Python
Video Intelligence에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다.
자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.
import io
from google.cloud import videointelligence_v1 as videointelligence
def detect_person(local_file_path="path/to/your/video-file.mp4"):
"""Detects people in a video from a local file."""
client = videointelligence.VideoIntelligenceServiceClient()
with io.open(local_file_path, "rb") as f:
input_content = f.read()
# Configure the request
config = videointelligence.types.PersonDetectionConfig(
include_bounding_boxes=True,
include_attributes=True,
include_pose_landmarks=True,
)
context = videointelligence.types.VideoContext(person_detection_config=config)
# Start the asynchronous request
operation = client.annotate_video(
request={
"features": [videointelligence.Feature.PERSON_DETECTION],
"input_content": input_content,
"video_context": context,
}
)
print("\nProcessing video for person detection annotations.")
result = operation.result(timeout=300)
print("\nFinished processing.\n")
# Retrieve the first result, because a single video was processed.
annotation_result = result.annotation_results[0]
for annotation in annotation_result.person_detection_annotations:
print("Person detected:")
for track in annotation.tracks:
print(
"Segment: {}s to {}s".format(
track.segment.start_time_offset.seconds
+ track.segment.start_time_offset.microseconds / 1e6,
track.segment.end_time_offset.seconds
+ track.segment.end_time_offset.microseconds / 1e6,
)
)
# Each segment includes timestamped objects that include
# characteristic - -e.g.clothes, posture of the person detected.
# Grab the first timestamped object
timestamped_object = track.timestamped_objects[0]
box = timestamped_object.normalized_bounding_box
print("Bounding box:")
print("\tleft : {}".format(box.left))
print("\ttop : {}".format(box.top))
print("\tright : {}".format(box.right))
print("\tbottom: {}".format(box.bottom))
# Attributes include unique pieces of clothing,
# poses, or hair color.
print("Attributes:")
for attribute in timestamped_object.attributes:
print(
"\t{}:{} {}".format(
attribute.name, attribute.value, attribute.confidence
)
)
# Landmarks in person detection include body parts such as
# left_shoulder, right_ear, and right_ankle
print("Landmarks:")
for landmark in timestamped_object.landmarks:
print(
"\t{}: {} (x={}, y={})".format(
landmark.name,
landmark.confidence,
landmark.point.x, # Normalized vertex
landmark.point.y, # Normalized vertex
)
)