使用转录模型转录 Cloud Storage 中的文件
使用集合让一切井井有条
根据您的偏好保存内容并对其进行分类。
使用经过训练的转录模型转录 Cloud Storage 中的音频文件。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
如未另行说明,那么本页面中的内容已根据知识共享署名 4.0 许可获得了许可,并且代码示例已根据 Apache 2.0 许可获得了许可。有关详情,请参阅 Google 开发者网站政策。Java 是 Oracle 和/或其关联公司的注册商标。
[[["易于理解","easyToUnderstand","thumb-up"],["解决了我的问题","solvedMyProblem","thumb-up"],["其他","otherUp","thumb-up"]],[["很难理解","hardToUnderstand","thumb-down"],["信息或示例代码不正确","incorrectInformationOrSampleCode","thumb-down"],["没有我需要的信息/示例","missingTheInformationSamplesINeed","thumb-down"],["翻译问题","translationIssue","thumb-down"],["其他","otherDown","thumb-down"]],[],[],[],null,["# Transcribe a file in Cloud Storage using a transcription model\n\nTranscribe an audio file in Cloud Storage using a trained transcription model.\n\nExplore further\n---------------\n\n\nFor detailed documentation that includes this code sample, see the following:\n\n- [Select a transcription model](/speech-to-text/docs/transcription-model)\n- [Transcribe audio from a video file using Speech-to-Text](/speech-to-text/docs/transcribe-audio-from-video-speech-to-text)\n\nCode sample\n-----------\n\n### Go\n\n\nTo learn how to install and use the client library for Speech-to-Text, see\n[Speech-to-Text client libraries](/speech-to-text/docs/client-libraries).\n\n\nFor more information, see the\n[Speech-to-Text Go API\nreference documentation](/go/docs/reference/cloud.google.com/go/speech/latest/apiv1).\n\n\nTo authenticate to Speech-to-Text, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n\n import (\n \t\"context\"\n \t\"fmt\"\n \t\"io\"\n \t\"strings\"\n\n \tspeech \"cloud.google.com/go/speech/apiv1\"\n \t\"cloud.google.com/go/speech/apiv1/speechpb\"\n )\n\n // transcribe_model_selection_gcs Transcribes the given audio file asynchronously with\n // the selected model.\n func transcribe_model_selection_gcs(w io.Writer) error {\n \tctx := context.Background()\n\n \tclient, err := speech.https://cloud.google.com/go/docs/reference/cloud.google.com/go/speech/latest/apiv1.html#cloud_google_com_go_speech_apiv1_Client_NewClient(ctx)\n \tif err != nil {\n \t\treturn fmt.Errorf(\"NewClient: %w\", err)\n \t}\n \tdefer client.Close()\n\n \taudio := &speechpb.RecognitionAudio{\n \t\tAudioSource: &speechpb.RecognitionAudio_Uri{Uri: \"gs://cloud-samples-tests/speech/Google_Gnome.wav\"},\n \t}\n\n \t// The speech recognition model to use\n \t// See, https://cloud.google.com/speech-to-text/docs/speech-to-text-requests#select-model\n \trecognitionConfig := &speechpb.RecognitionConfig{\n \t\tEncoding: speechpb.https://cloud.google.com/go/docs/reference/cloud.google.com/go/speech/latest/apiv1/speechpb.html#cloud_google_com_go_speech_apiv1_speechpb_RecognitionConfig_ENCODING_UNSPECIFIED_RecognitionConfig_LINEAR16_RecognitionConfig_FLAC_RecognitionConfig_MULAW_RecognitionConfig_AMR_RecognitionConfig_AMR_WB_RecognitionConfig_OGG_OPUS_RecognitionConfig_SPEEX_WITH_HEADER_BYTE_RecognitionConfig_MP3_RecognitionConfig_WEBM_OPUS,\n \t\tSampleRateHertz: 16000,\n \t\tLanguageCode: \"en-US\",\n \t\tModel: \"video\",\n \t}\n\n \tlongRunningRecognizeRequest := &speechpb.LongRunningRecognizeRequest{\n \t\tConfig: recognitionConfig,\n \t\tAudio: audio,\n \t}\n\n \toperation, err := client.LongRunningRecognize(ctx, longRunningRecognizeRequest)\n \tif err != nil {\n \t\treturn fmt.Errorf(\"error running recognize %w\", err)\n \t}\n\n \tresponse, err := operation.https://cloud.google.com/go/docs/reference/cloud.google.com/go/speech/latest/apiv1.html#cloud_google_com_go_speech_apiv1_LongRunningRecognizeOperation_Wait(ctx)\n \tif err != nil {\n \t\treturn err\n \t}\n \tfor i, result := range response.Results {\n \t\talternative := result.Alternatives[0]\n \t\tfmt.Fprintf(w, \"%s\\n\", strings.Repeat(\"-\", 20))\n \t\tfmt.Fprintf(w, \"First alternative of result %d\", i)\n \t\tfmt.Fprintf(w, \"Transcript: %s\", alternative.Transcript)\n \t}\n \treturn nil\n }\n\n### Java\n\n\nTo learn how to install and use the client library for Speech-to-Text, see\n[Speech-to-Text client libraries](/speech-to-text/docs/client-libraries).\n\n\nFor more information, see the\n[Speech-to-Text Java API\nreference documentation](/java/docs/reference/google-cloud-speech/latest/overview).\n\n\nTo authenticate to Speech-to-Text, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n /**\n * Performs transcription of the remote audio file asynchronously with the selected model.\n *\n * @param gcsUri the path to the remote audio file to transcribe.\n */\n public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {\n try (SpeechClient speech = SpeechClient.create()) {\n\n // Configure request with video media type\n RecognitionConfig config =\n RecognitionConfig.newBuilder()\n // encoding may either be omitted or must match the value in the file header\n .setEncoding(AudioEncoding.LINEAR16)\n .setLanguageCode(\"en-US\")\n // sample rate hertz may be either be omitted or must match the value in the file\n // header\n .setSampleRateHertz(16000)\n .setModel(\"video\")\n .build();\n\n RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();\n\n // Use non-blocking call for getting file transcription\n OperationFuture\u003cLongRunningRecognizeResponse, LongRunningRecognizeMetadata\u003e response =\n speech.longRunningRecognizeAsync(config, audio);\n\n while (!response.isDone()) {\n System.out.println(\"Waiting for response...\");\n Thread.sleep(10000);\n }\n\n List\u003cSpeechRecognitionResult\u003e results = response.get().getResultsList();\n\n // Just print the first result here.\n SpeechRecognitionResult result = results.get(0);\n // There can be several alternative transcripts for a given chunk of speech. Just use the\n // first (most likely) one here.\n SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);\n System.out.printf(\"Transcript : %s\\n\", alternative.getTranscript());\n }\n }\n\n### Node.js\n\n\nTo learn how to install and use the client library for Speech-to-Text, see\n[Speech-to-Text client libraries](/speech-to-text/docs/client-libraries).\n\n\nFor more information, see the\n[Speech-to-Text Node.js API\nreference documentation](/nodejs/docs/reference/speech/latest).\n\n\nTo authenticate to Speech-to-Text, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n // Imports the Google Cloud client library for Beta API\n /**\n * TODO(developer): Update client library import to use new\n * version of API when desired features become available\n */\n const speech = require('https://cloud.google.com/nodejs/docs/reference/speech/latest/overview.html').v1p1beta1;\n\n // Creates a client\n const client = new speech.https://cloud.google.com/nodejs/docs/reference/speech/latest/overview.html();\n\n /**\n * TODO(developer): Uncomment the following lines before running the sample.\n */\n // const gcsUri = 'gs://my-bucket/audio.raw';\n // const model = 'Model to use, e.g. phone_call, video, default';\n // const encoding = 'Encoding of the audio file, e.g. LINEAR16';\n // const sampleRateHertz = 16000;\n // const languageCode = 'BCP-47 language code, e.g. en-US';\n\n const config = {\n encoding: encoding,\n sampleRateHertz: sampleRateHertz,\n languageCode: languageCode,\n model: model,\n };\n const audio = {\n uri: gcsUri,\n };\n\n const request = {\n config: config,\n audio: audio,\n };\n\n // Detects speech in the audio file.\n const [response] = await client.recognize(request);\n const transcription = response.results\n .map(result =\u003e result.alternatives[0].transcript)\n .join('\\n');\n console.log('Transcription: ', transcription);\n\n### Python\n\n\nTo learn how to install and use the client library for Speech-to-Text, see\n[Speech-to-Text client libraries](/speech-to-text/docs/client-libraries).\n\n\nFor more information, see the\n[Speech-to-Text Python API\nreference documentation](/python/docs/reference/speech/latest).\n\n\nTo authenticate to Speech-to-Text, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n from google.cloud import speech\n\n client = speech.SpeechClient()\n\n audio = speech.https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v1.types.RecognitionAudio.html(\n uri=\"gs://cloud-samples-tests/speech/Google_Gnome.wav\"\n )\n\n config = speech.https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v1.types.RecognitionConfig.html(\n encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,\n sample_rate_hertz=16000,\n language_code=\"en-US\",\n model=\"video\", # Chosen model\n )\n\n operation = client.https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v1.services.speech.SpeechClient.html#google_cloud_speech_v1_services_speech_SpeechClient_long_running_recognize(config=config, audio=audio)\n\n print(\"Waiting for operation to complete...\")\n response = operation.result(timeout=90)\n\n for i, result in enumerate(response.results):\n alternative = result.alternatives[0]\n print(\"-\" * 20)\n print(f\"First alternative of result {i}\")\n print(f\"Transcript: {alternative.transcript}\")\n\nWhat's next\n-----------\n\n\nTo search and filter code samples for other Google Cloud products, see the\n[Google Cloud sample browser](/docs/samples?product=speech)."]]