Asynchrone Transkription für eine in Cloud Storage gespeicherte Audiodatei ausführen.
Weitere Informationen
Eine ausführliche Dokumentation, die dieses Codebeispiel enthält, finden Sie hier:
Codebeispiel
Go
func sendGCS(w io.Writer, client *speech.Client, gcsURI string) error {
ctx := context.Background()
// Send the contents of the audio file with the encoding and
// and sample rate information to be transcripted.
req := &speechpb.LongRunningRecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Uri{Uri: gcsURI},
},
}
op, err := client.LongRunningRecognize(ctx, req)
if err != nil {
return err
}
resp, err := op.Wait(ctx)
if err != nil {
return err
}
// Print the results.
for _, result := range resp.Results {
for _, alt := range result.Alternatives {
fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence)
}
}
return nil
}
Java
/**
* Performs non-blocking speech recognition on remote FLAC file and prints the transcription.
*
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
public static void asyncRecognizeGcs(String gcsUri) throws Exception {
// Configure polling algorithm
SpeechSettings.Builder speechSettings = SpeechSettings.newBuilder();
TimedRetryAlgorithm timedRetryAlgorithm =
OperationTimedPollAlgorithm.create(
RetrySettings.newBuilder()
.setInitialRetryDelay(Duration.ofMillis(500L))
.setRetryDelayMultiplier(1.5)
.setMaxRetryDelay(Duration.ofMillis(5000L))
.setInitialRpcTimeout(Duration.ZERO) // ignored
.setRpcTimeoutMultiplier(1.0) // ignored
.setMaxRpcTimeout(Duration.ZERO) // ignored
.setTotalTimeout(Duration.ofHours(24L)) // set polling timeout to 24 hours
.build());
speechSettings.longRunningRecognizeOperationSettings().setPollingAlgorithm(timedRetryAlgorithm);
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create(speechSettings.build())) {
// Configure remote file request for FLAC
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
List<SpeechRecognitionResult> results = response.get().getResultsList();
for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcription: %s\n", alternative.getTranscript());
}
}
}
Node.js
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const gcsUri = 'gs://my-bucket/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
};
const audio = {
uri: gcsUri,
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
const [operation] = await client.longRunningRecognize(request);
// Get a Promise representation of the final result of the job
const [response] = await operation.promise();
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
PHP
use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
/**
* @param string $uri The Cloud Storage object to transcribe (gs://your-bucket-name/your-object-name)
*/
function transcribe_async_gcs(string $uri)
{
// change these variables if necessary
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 32000;
$languageCode = 'en-US';
// set string as audio content
$audio = (new RecognitionAudio())
->setUri($uri);
// set config
$config = (new RecognitionConfig())
->setEncoding($encoding)
->setSampleRateHertz($sampleRateHertz)
->setLanguageCode($languageCode);
// create the speech client
$client = new SpeechClient();
// create the asyncronous recognize operation
$operation = $client->longRunningRecognize($config, $audio);
$operation->pollUntilComplete();
if ($operation->operationSucceeded()) {
$response = $operation->getResult();
// each result is for a consecutive portion of the audio. iterate
// through them to get the transcripts for the entire audio file.
foreach ($response->getResults() as $result) {
$alternatives = $result->getAlternatives();
$mostLikely = $alternatives[0];
$transcript = $mostLikely->getTranscript();
$confidence = $mostLikely->getConfidence();
printf('Transcript: %s' . PHP_EOL, $transcript);
printf('Confidence: %s' . PHP_EOL, $confidence);
}
} else {
print_r($operation->getError());
}
$client->close();
}
Python
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
client = speech.SpeechClient()
audio = speech.RecognitionAudio(uri=gcs_uri)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code="en-US",
)
operation = client.long_running_recognize(config=config, audio=audio)
print("Waiting for operation to complete...")
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u"Transcript: {}".format(result.alternatives[0].transcript))
print("Confidence: {}".format(result.alternatives[0].confidence))
Ruby
# storage_path = "Path to file in Cloud Storage, eg. gs://bucket/audio.raw"
require "google/cloud/speech"
speech = Google::Cloud::Speech.speech
config = { encoding: :LINEAR16,
sample_rate_hertz: 16_000,
language_code: "en-US" }
audio = { uri: storage_path }
operation = speech.long_running_recognize config: config, audio: audio
puts "Operation started"
operation.wait_until_done!
raise operation.results.message if operation.error?
results = operation.response.results
alternatives = results.first.alternatives
alternatives.each do |alternative|
puts "Transcription: #{alternative.transcript}"
end
Nächste Schritte
Informationen zum Suchen und Filtern von Codebeispielen für andere Google Cloud-Produkte finden Sie im Google Cloud-Beispielbrowser.