Video Intelligence API는 LABEL_DETECTION 기능을 사용하여 동영상에 표시되는 항목을 식별할 수 있습니다. 이 기능은 객체, 위치, 활동, 동물 종, 제품 등을 식별합니다.
분석을 다음과 같이 구획화할 수 있습니다.
프레임 수준: 각 프레임 내에서 항목이 식별되고 라벨로 지정됩니다(초당 1프레임 샘플링).
장면 수준: 모든 세그먼트(또는 동영상) 내에서 장면이 자동으로 감지됩니다. 그런 후 항목이 식별되고 각 장면 내에서 라벨로 지정됩니다.
세그먼트 수준: 주석 목적에 따라 시작 및 종료 타임스탬프를 규정하여 사용자가 선택한 동영상 세그먼트를 분석하도록 지정할 수 있습니다(VideoSegment 참조).
그런 후 항목이 식별되고 각 세그먼트 내에서 라벨로 지정됩니다. 세그먼트를 지정하지 않으면 전체 동영상이 하나의 세그먼트로 취급됩니다.
다음은 videos:annotate 메서드에 POST 요청을 보내는 방법을 보여줍니다. LabelDetectionMode를 장면 수준 또는 프레임 수준 주석으로 구성할 수 있습니다. SHOT_AND_FRAME_MODE를 사용하는 것이 좋습니다. 이 예시에서는 Google Cloud CLI를 사용하는 프로젝트의 서비스 계정을 설정하기 위해 액세스 토큰을 사용합니다. Google Cloud CLI 설치, 서비스 계정으로 프로젝트 설정, 액세스 토큰 획득 방법은 Video Intelligence 빠른 시작을 참조하세요.
funclabel(wio.Writer,filestring)error{ctx:=context.Background()client,err:=video.NewClient(ctx)iferr!=nil{returnfmt.Errorf("video.NewClient: %w",err)}deferclient.Close()fileBytes,err:=os.ReadFile(file)iferr!=nil{returnerr}op,err:=client.AnnotateVideo(ctx,&videopb.AnnotateVideoRequest{Features:[]videopb.Feature{videopb.Feature_LABEL_DETECTION,},InputContent:fileBytes,})iferr!=nil{returnfmt.Errorf("AnnotateVideo: %w",err)}resp,err:=op.Wait(ctx)iferr!=nil{returnfmt.Errorf("Wait: %w",err)}printLabels:=func(labels[]*videopb.LabelAnnotation){for_,label:=rangelabels{fmt.Fprintf(w,"\tDescription: %s\n",label.Entity.Description)for_,category:=rangelabel.CategoryEntities{fmt.Fprintf(w,"\t\tCategory: %s\n",category.Description)}for_,segment:=rangelabel.Segments{start,_:=ptypes.Duration(segment.Segment.StartTimeOffset)end,_:=ptypes.Duration(segment.Segment.EndTimeOffset)fmt.Fprintf(w,"\t\tSegment: %s to %s\n",start,end)}}}// A single video was processed. Get the first result.result:=resp.AnnotationResults[0]fmt.Fprintln(w,"SegmentLabelAnnotations:")printLabels(result.SegmentLabelAnnotations)fmt.Fprintln(w,"ShotLabelAnnotations:")printLabels(result.ShotLabelAnnotations)fmt.Fprintln(w,"FrameLabelAnnotations:")printLabels(result.FrameLabelAnnotations)returnnil}
Java
// Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClienttry(VideoIntelligenceServiceClientclient=VideoIntelligenceServiceClient.create()){// Read file and encode into Base64Pathpath=Paths.get(filePath);byte[]data=Files.readAllBytes(path);AnnotateVideoRequestrequest=AnnotateVideoRequest.newBuilder().setInputContent(ByteString.copyFrom(data)).addFeatures(Feature.LABEL_DETECTION).build();// Create an operation that will contain the response when the operation completes.OperationFuture<AnnotateVideoResponse,AnnotateVideoProgress>response=client.annotateVideoAsync(request);System.out.println("Waiting for operation to complete...");for(VideoAnnotationResultsresults:response.get().getAnnotationResultsList()){// process video / segment level label annotationsSystem.out.println("Locations: ");for(LabelAnnotationlabelAnnotation:results.getSegmentLabelAnnotationsList()){System.out.println("Video label: "+labelAnnotation.getEntity().getDescription());// categoriesfor(EntitycategoryEntity:labelAnnotation.getCategoryEntitiesList()){System.out.println("Video label category: "+categoryEntity.getDescription());}// segmentsfor(LabelSegmentsegment:labelAnnotation.getSegmentsList()){doublestartTime=segment.getSegment().getStartTimeOffset().getSeconds()+segment.getSegment().getStartTimeOffset().getNanos()/1e9;doubleendTime=segment.getSegment().getEndTimeOffset().getSeconds()+segment.getSegment().getEndTimeOffset().getNanos()/1e9;System.out.printf("Segment location: %.3f:%.2f\n",startTime,endTime);System.out.println("Confidence: "+segment.getConfidence());}}// process shot label annotationsfor(LabelAnnotationlabelAnnotation:results.getShotLabelAnnotationsList()){System.out.println("Shot label: "+labelAnnotation.getEntity().getDescription());// categoriesfor(EntitycategoryEntity:labelAnnotation.getCategoryEntitiesList()){System.out.println("Shot label category: "+categoryEntity.getDescription());}// segmentsfor(LabelSegmentsegment:labelAnnotation.getSegmentsList()){doublestartTime=segment.getSegment().getStartTimeOffset().getSeconds()+segment.getSegment().getStartTimeOffset().getNanos()/1e9;doubleendTime=segment.getSegment().getEndTimeOffset().getSeconds()+segment.getSegment().getEndTimeOffset().getNanos()/1e9;System.out.printf("Segment location: %.3f:%.2f\n",startTime,endTime);System.out.println("Confidence: "+segment.getConfidence());}}// process frame label annotationsfor(LabelAnnotationlabelAnnotation:results.getFrameLabelAnnotationsList()){System.out.println("Frame label: "+labelAnnotation.getEntity().getDescription());// categoriesfor(EntitycategoryEntity:labelAnnotation.getCategoryEntitiesList()){System.out.println("Frame label category: "+categoryEntity.getDescription());}// segmentsfor(LabelSegmentsegment:labelAnnotation.getSegmentsList()){doublestartTime=segment.getSegment().getStartTimeOffset().getSeconds()+segment.getSegment().getStartTimeOffset().getNanos()/1e9;doubleendTime=segment.getSegment().getEndTimeOffset().getSeconds()+segment.getSegment().getEndTimeOffset().getNanos()/1e9;System.out.printf("Segment location: %.3f:%.2f\n",startTime,endTime);System.out.println("Confidence: "+segment.getConfidence());}}}}
Node.js
// Imports the Google Cloud Video Intelligence library + Node's fs libraryconstvideo=require('@google-cloud/video-intelligence').v1;constfs=require('fs');constutil=require('util');// Creates a clientconstclient=newvideo.VideoIntelligenceServiceClient();/** * TODO(developer): Uncomment the following line before running the sample. */// const path = 'Local file to analyze, e.g. ./my-file.mp4';// Reads a local video file and converts it to base64constreadFile=util.promisify(fs.readFile);constfile=awaitreadFile(path);constinputContent=file.toString('base64');// Constructs requestconstrequest={inputContent:inputContent,features:['LABEL_DETECTION'],};// Detects labels in a videoconst[operation]=awaitclient.annotateVideo(request);console.log('Waiting for operation to complete...');const[operationResult]=awaitoperation.promise();// Gets annotations for videoconstannotations=operationResult.annotationResults[0];constlabels=annotations.segmentLabelAnnotations;labels.forEach(label=>{console.log(`Label ${label.entity.description} occurs at:`);label.segments.forEach(segment=>{consttime=segment.segment;if(time.startTimeOffset.seconds===undefined){time.startTimeOffset.seconds=0;}if(time.startTimeOffset.nanos===undefined){time.startTimeOffset.nanos=0;}if(time.endTimeOffset.seconds===undefined){time.endTimeOffset.seconds=0;}if(time.endTimeOffset.nanos===undefined){time.endTimeOffset.nanos=0;}console.log(`\tStart: ${time.startTimeOffset.seconds}`+`.${(time.startTimeOffset.nanos/1e6).toFixed(0)}s`);console.log(`\tEnd: ${time.endTimeOffset.seconds}.`+`${(time.endTimeOffset.nanos/1e6).toFixed(0)}s`);console.log(`\tConfidence: ${segment.confidence}`);});});
"""Detect labels given a file path."""video_client=videointelligence.VideoIntelligenceServiceClient()features=[videointelligence.Feature.LABEL_DETECTION]withio.open(path,"rb")asmovie:input_content=movie.read()operation=video_client.annotate_video(request={"features":features,"input_content":input_content})print("\nProcessing video for label annotations:")result=operation.result(timeout=90)print("\nFinished processing.")# Process video/segment level label annotationssegment_labels=result.annotation_results[0].segment_label_annotationsfori,segment_labelinenumerate(segment_labels):print("Video label description: {}".format(segment_label.entity.description))forcategory_entityinsegment_label.category_entities:print("\tLabel category description: {}".format(category_entity.description))fori,segmentinenumerate(segment_label.segments):start_time=(segment.segment.start_time_offset.seconds+segment.segment.start_time_offset.microseconds/1e6)end_time=(segment.segment.end_time_offset.seconds+segment.segment.end_time_offset.microseconds/1e6)positions="{}s to {}s".format(start_time,end_time)confidence=segment.confidenceprint("\tSegment {}: {}".format(i,positions))print("\tConfidence: {}".format(confidence))print("\n")# Process shot level label annotationsshot_labels=result.annotation_results[0].shot_label_annotationsfori,shot_labelinenumerate(shot_labels):print("Shot label description: {}".format(shot_label.entity.description))forcategory_entityinshot_label.category_entities:print("\tLabel category description: {}".format(category_entity.description))fori,shotinenumerate(shot_label.segments):start_time=(shot.segment.start_time_offset.seconds+shot.segment.start_time_offset.microseconds/1e6)end_time=(shot.segment.end_time_offset.seconds+shot.segment.end_time_offset.microseconds/1e6)positions="{}s to {}s".format(start_time,end_time)confidence=shot.confidenceprint("\tSegment {}: {}".format(i,positions))print("\tConfidence: {}".format(confidence))print("\n")# Process frame level label annotationsframe_labels=result.annotation_results[0].frame_label_annotationsfori,frame_labelinenumerate(frame_labels):print("Frame label description: {}".format(frame_label.entity.description))forcategory_entityinframe_label.category_entities:print("\tLabel category description: {}".format(category_entity.description))# Each frame_label_annotation has many frames,# here we print information only about the first frame.frame=frame_label.frames[0]time_offset=frame.time_offset.seconds+frame.time_offset.microseconds/1e6print("\tFirst frame time offset: {}s".format(time_offset))print("\tFirst frame confidence: {}".format(frame.confidence))print("\n")
다음은 annotate 메서드에 POST 요청을 보내는 방법을 보여줍니다. 이 예시에서는 Google Cloud CLI를 사용하는 프로젝트의 서비스 계정을 설정하기 위해 액세스 토큰을 사용합니다. Google Cloud CLI 설치, 서비스 계정으로 프로젝트 설정, 액세스 토큰 획득 방법은 Video Intelligence 빠른 시작을 참조하세요.
요청 데이터를 사용하기 전에 다음을 바꿉니다.
INPUT_URI: 파일 이름을 포함하여 주석을 추가하고자 하는 파일을 포함한 Cloud Storage 버킷입니다. gs://로 시작해야 합니다.
PROJECT_NUMBER: Google Cloud 프로젝트의 숫자 식별자
HTTP 메서드 및 URL:
POST https://videointelligence.googleapis.com/v1/videos:annotate
funclabelURI(wio.Writer,filestring)error{ctx:=context.Background()client,err:=video.NewClient(ctx)iferr!=nil{returnfmt.Errorf("video.NewClient: %w",err)}deferclient.Close()op,err:=client.AnnotateVideo(ctx,&videopb.AnnotateVideoRequest{Features:[]videopb.Feature{videopb.Feature_LABEL_DETECTION,},InputUri:file,})iferr!=nil{returnfmt.Errorf("AnnotateVideo: %w",err)}resp,err:=op.Wait(ctx)iferr!=nil{returnfmt.Errorf("Wait: %w",err)}printLabels:=func(labels[]*videopb.LabelAnnotation){for_,label:=rangelabels{fmt.Fprintf(w,"\tDescription: %s\n",label.Entity.Description)for_,category:=rangelabel.CategoryEntities{fmt.Fprintf(w,"\t\tCategory: %s\n",category.Description)}for_,segment:=rangelabel.Segments{start,_:=ptypes.Duration(segment.Segment.StartTimeOffset)end,_:=ptypes.Duration(segment.Segment.EndTimeOffset)fmt.Fprintf(w,"\t\tSegment: %s to %s\n",start,end)}}}// A single video was processed. Get the first result.result:=resp.AnnotationResults[0]fmt.Fprintln(w,"SegmentLabelAnnotations:")printLabels(result.SegmentLabelAnnotations)fmt.Fprintln(w,"ShotLabelAnnotations:")printLabels(result.ShotLabelAnnotations)fmt.Fprintln(w,"FrameLabelAnnotations:")printLabels(result.FrameLabelAnnotations)returnnil}
Java
// Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClienttry(VideoIntelligenceServiceClientclient=VideoIntelligenceServiceClient.create()){// Provide path to file hosted on GCS as "gs://bucket-name/..."AnnotateVideoRequestrequest=AnnotateVideoRequest.newBuilder().setInputUri(gcsUri).addFeatures(Feature.LABEL_DETECTION).build();// Create an operation that will contain the response when the operation completes.OperationFuture<AnnotateVideoResponse,AnnotateVideoProgress>response=client.annotateVideoAsync(request);System.out.println("Waiting for operation to complete...");for(VideoAnnotationResultsresults:response.get().getAnnotationResultsList()){// process video / segment level label annotationsSystem.out.println("Locations: ");for(LabelAnnotationlabelAnnotation:results.getSegmentLabelAnnotationsList()){System.out.println("Video label: "+labelAnnotation.getEntity().getDescription());// categoriesfor(EntitycategoryEntity:labelAnnotation.getCategoryEntitiesList()){System.out.println("Video label category: "+categoryEntity.getDescription());}// segmentsfor(LabelSegmentsegment:labelAnnotation.getSegmentsList()){doublestartTime=segment.getSegment().getStartTimeOffset().getSeconds()+segment.getSegment().getStartTimeOffset().getNanos()/1e9;doubleendTime=segment.getSegment().getEndTimeOffset().getSeconds()+segment.getSegment().getEndTimeOffset().getNanos()/1e9;System.out.printf("Segment location: %.3f:%.3f\n",startTime,endTime);System.out.println("Confidence: "+segment.getConfidence());}}// process shot label annotationsfor(LabelAnnotationlabelAnnotation:results.getShotLabelAnnotationsList()){System.out.println("Shot label: "+labelAnnotation.getEntity().getDescription());// categoriesfor(EntitycategoryEntity:labelAnnotation.getCategoryEntitiesList()){System.out.println("Shot label category: "+categoryEntity.getDescription());}// segmentsfor(LabelSegmentsegment:labelAnnotation.getSegmentsList()){doublestartTime=segment.getSegment().getStartTimeOffset().getSeconds()+segment.getSegment().getStartTimeOffset().getNanos()/1e9;doubleendTime=segment.getSegment().getEndTimeOffset().getSeconds()+segment.getSegment().getEndTimeOffset().getNanos()/1e9;System.out.printf("Segment location: %.3f:%.3f\n",startTime,endTime);System.out.println("Confidence: "+segment.getConfidence());}}// process frame label annotationsfor(LabelAnnotationlabelAnnotation:results.getFrameLabelAnnotationsList()){System.out.println("Frame label: "+labelAnnotation.getEntity().getDescription());// categoriesfor(EntitycategoryEntity:labelAnnotation.getCategoryEntitiesList()){System.out.println("Frame label category: "+categoryEntity.getDescription());}// segmentsfor(LabelSegmentsegment:labelAnnotation.getSegmentsList()){doublestartTime=segment.getSegment().getStartTimeOffset().getSeconds()+segment.getSegment().getStartTimeOffset().getNanos()/1e9;doubleendTime=segment.getSegment().getEndTimeOffset().getSeconds()+segment.getSegment().getEndTimeOffset().getNanos()/1e9;System.out.printf("Segment location: %.3f:%.2f\n",startTime,endTime);System.out.println("Confidence: "+segment.getConfidence());}}}}
Node.js
// Imports the Google Cloud Video Intelligence libraryconstvideo=require('@google-cloud/video-intelligence').v1;// Creates a clientconstclient=newvideo.VideoIntelligenceServiceClient();/** * TODO(developer): Uncomment the following line before running the sample. */// const gcsUri = 'GCS URI of the video to analyze, e.g. gs://my-bucket/my-video.mp4';constrequest={inputUri:gcsUri,features:['LABEL_DETECTION'],};// Detects labels in a videoconst[operation]=awaitclient.annotateVideo(request);console.log('Waiting for operation to complete...');const[operationResult]=awaitoperation.promise();// Gets annotations for videoconstannotations=operationResult.annotationResults[0];constlabels=annotations.segmentLabelAnnotations;labels.forEach(label=>{console.log(`Label ${label.entity.description} occurs at:`);label.segments.forEach(segment=>{consttime=segment.segment;if(time.startTimeOffset.seconds===undefined){time.startTimeOffset.seconds=0;}if(time.startTimeOffset.nanos===undefined){time.startTimeOffset.nanos=0;}if(time.endTimeOffset.seconds===undefined){time.endTimeOffset.seconds=0;}if(time.endTimeOffset.nanos===undefined){time.endTimeOffset.nanos=0;}console.log(`\tStart: ${time.startTimeOffset.seconds}`+`.${(time.startTimeOffset.nanos/1e6).toFixed(0)}s`);console.log(`\tEnd: ${time.endTimeOffset.seconds}.`+`${(time.endTimeOffset.nanos/1e6).toFixed(0)}s`);console.log(`\tConfidence: ${segment.confidence}`);});});
Python
"""Detects labels given a GCS path."""video_client=videointelligence.VideoIntelligenceServiceClient()features=[videointelligence.Feature.LABEL_DETECTION]mode=videointelligence.LabelDetectionMode.SHOT_AND_FRAME_MODEconfig=videointelligence.LabelDetectionConfig(label_detection_mode=mode)context=videointelligence.VideoContext(label_detection_config=config)operation=video_client.annotate_video(request={"features":features,"input_uri":path,"video_context":context})print("\nProcessing video for label annotations:")result=operation.result(timeout=180)print("\nFinished processing.")# Process video/segment level label annotationssegment_labels=result.annotation_results[0].segment_label_annotationsfori,segment_labelinenumerate(segment_labels):print("Video label description: {}".format(segment_label.entity.description))forcategory_entityinsegment_label.category_entities:print("\tLabel category description: {}".format(category_entity.description))fori,segmentinenumerate(segment_label.segments):start_time=(segment.segment.start_time_offset.seconds+segment.segment.start_time_offset.microseconds/1e6)end_time=(segment.segment.end_time_offset.seconds+segment.segment.end_time_offset.microseconds/1e6)positions="{}s to {}s".format(start_time,end_time)confidence=segment.confidenceprint("\tSegment {}: {}".format(i,positions))print("\tConfidence: {}".format(confidence))print("\n")# Process shot level label annotationsshot_labels=result.annotation_results[0].shot_label_annotationsfori,shot_labelinenumerate(shot_labels):print("Shot label description: {}".format(shot_label.entity.description))forcategory_entityinshot_label.category_entities:print("\tLabel category description: {}".format(category_entity.description))fori,shotinenumerate(shot_label.segments):start_time=(shot.segment.start_time_offset.seconds+shot.segment.start_time_offset.microseconds/1e6)end_time=(shot.segment.end_time_offset.seconds+shot.segment.end_time_offset.microseconds/1e6)positions="{}s to {}s".format(start_time,end_time)confidence=shot.confidenceprint("\tSegment {}: {}".format(i,positions))print("\tConfidence: {}".format(confidence))print("\n")# Process frame level label annotationsframe_labels=result.annotation_results[0].frame_label_annotationsfori,frame_labelinenumerate(frame_labels):print("Frame label description: {}".format(frame_label.entity.description))forcategory_entityinframe_label.category_entities:print("\tLabel category description: {}".format(category_entity.description))# Each frame_label_annotation has many frames,# here we print information only about the first frame.frame=frame_label.frames[0]time_offset=frame.time_offset.seconds+frame.time_offset.microseconds/1e6print("\tFirst frame time offset: {}s".format(time_offset))print("\tFirst frame confidence: {}".format(frame.confidence))print("\n")