转录在本地存储的视频中的语音。
代码示例
Go
如需向 Video Intelligence 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
func speechTranscription(w io.Writer, file string) error {
ctx := context.Background()
client, err := video.NewClient(ctx)
if err != nil {
return err
}
defer client.Close()
fileBytes, err := ioutil.ReadFile(file)
if err != nil {
return err
}
op, err := client.AnnotateVideo(ctx, &videopb.AnnotateVideoRequest{
Features: []videopb.Feature{
videopb.Feature_SPEECH_TRANSCRIPTION,
},
VideoContext: &videopb.VideoContext{
SpeechTranscriptionConfig: &videopb.SpeechTranscriptionConfig{
LanguageCode: "en-US",
EnableAutomaticPunctuation: true,
},
},
InputContent: fileBytes,
})
if err != nil {
return err
}
resp, err := op.Wait(ctx)
if err != nil {
return err
}
// A single video was processed. Get the first result.
result := resp.AnnotationResults[0]
for _, transcription := range result.SpeechTranscriptions {
// The number of alternatives for each transcription is limited by
// SpeechTranscriptionConfig.MaxAlternatives.
// Each alternative is a different possible transcription
// and has its own confidence score.
for _, alternative := range transcription.GetAlternatives() {
fmt.Fprintf(w, "Alternative level information:\n")
fmt.Fprintf(w, "\tTranscript: %v\n", alternative.GetTranscript())
fmt.Fprintf(w, "\tConfidence: %v\n", alternative.GetConfidence())
fmt.Fprintf(w, "Word level information:\n")
for _, wordInfo := range alternative.GetWords() {
startTime := wordInfo.GetStartTime()
endTime := wordInfo.GetEndTime()
fmt.Fprintf(w, "\t%4.1f - %4.1f: %v (speaker %v)\n",
float64(startTime.GetSeconds())+float64(startTime.GetNanos())*1e-9, // start as seconds
float64(endTime.GetSeconds())+float64(endTime.GetNanos())*1e-9, // end as seconds
wordInfo.GetWord(),
wordInfo.GetSpeakerTag())
}
}
}
return nil
}
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。