Eseguire una trascrizione sincrona su un file audio archiviato in Cloud Storage
Per saperne di più
Per una documentazione dettagliata che includa questo esempio di codice, consulta quanto segue:
Esempio di codice
Go
func recognizeGCS(w io.Writer, gcsURI string) error {
ctx := context.Background()
client, err := speech.NewClient(ctx)
if err != nil {
return err
}
defer client.Close()
// Send the request with the URI (gs://...)
// and sample rate information to be transcripted.
resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Uri{Uri: gcsURI},
},
})
// Print the results.
for _, result := range resp.Results {
for _, alt := range result.Alternatives {
fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence)
}
}
return nil
}
Java
/**
* Performs speech recognition on remote FLAC file and prints the transcription.
*
* @param gcsUri the path to the remote FLAC audio file to transcribe.
*/
public static void syncRecognizeGcs(String gcsUri) throws Exception {
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create()) {
// Builds the request for remote FLAC file
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use blocking call for getting audio transcript
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();
for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcription: %s%n", alternative.getTranscript());
}
}
}
Node.js
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const gcsUri = 'gs://my-bucket/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
};
const audio = {
uri: gcsUri,
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log('Transcription: ', transcription);
PHP
use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
/**
* @param string $uri The Cloud Storage object to transcribe (gs://your-bucket-name/your-object-name)
*/
function transcribe_sync_gcs(string $uri)
{
// change these variables if necessary
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 32000;
$languageCode = 'en-US';
// set string as audio content
$audio = (new RecognitionAudio())
->setUri($uri);
// set config
$config = (new RecognitionConfig())
->setEncoding($encoding)
->setSampleRateHertz($sampleRateHertz)
->setLanguageCode($languageCode);
// create the speech client
$client = new SpeechClient();
try {
$response = $client->recognize($config, $audio);
foreach ($response->getResults() as $result) {
$alternatives = $result->getAlternatives();
$mostLikely = $alternatives[0];
$transcript = $mostLikely->getTranscript();
$confidence = $mostLikely->getConfidence();
printf('Transcript: %s' . PHP_EOL, $transcript);
printf('Confidence: %s' . PHP_EOL, $confidence);
}
} finally {
$client->close();
}
}
Python
def transcribe_gcs(gcs_uri):
"""Transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
client = speech.SpeechClient()
audio = speech.RecognitionAudio(uri=gcs_uri)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code="en-US",
)
response = client.recognize(config=config, audio=audio)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u"Transcript: {}".format(result.alternatives[0].transcript))
Ruby
# storage_path = "Path to file in Cloud Storage, eg. gs://bucket/audio.raw"
require "google/cloud/speech"
speech = Google::Cloud::Speech.speech
config = { encoding: :LINEAR16,
sample_rate_hertz: 16_000,
language_code: "en-US" }
audio = { uri: storage_path }
response = speech.recognize config: config, audio: audio
results = response.results
alternatives = results.first.alternatives
alternatives.each do |alternative|
puts "Transcription: #{alternative.transcript}"
end
Passaggi successivi
Per cercare e filtrare esempi di codice per altri prodotti Google Cloud, consulta la pagina Browser di esempio Google Cloud.