Trascrivi un file audio locale utilizzando un modello di trascrizione addestrato.
Per saperne di più
Per una documentazione dettagliata che includa questo esempio di codice, consulta quanto segue:
Esempio di codice
Go
func modelSelection(w io.Writer, path string) error {
ctx := context.Background()
client, err := speech.NewClient(ctx)
if err != nil {
return fmt.Errorf("NewClient: %v", err)
}
defer client.Close()
// path = "../testdata/Google_Gnome.wav"
data, err := ioutil.ReadFile(path)
if err != nil {
return fmt.Errorf("ReadFile: %v", err)
}
req := &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
Model: "video",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: data},
},
}
resp, err := client.Recognize(ctx, req)
if err != nil {
return fmt.Errorf("Recognize: %v", err)
}
for i, result := range resp.Results {
fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
fmt.Fprintf(w, "Result %d\n", i+1)
for j, alternative := range result.Alternatives {
fmt.Fprintf(w, "Alternative %d: %s\n", j+1, alternative.Transcript)
}
}
return nil
}
Java
/**
* Performs transcription of the given audio file synchronously with the selected model.
*
* @param fileName the path to a audio file to transcribe
*/
public static void transcribeModelSelection(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speech = SpeechClient.create()) {
// Configure request with video media type
RecognitionConfig recConfig =
RecognitionConfig.newBuilder()
// encoding may either be omitted or must match the value in the file header
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
// sample rate hertz may be either be omitted or must match the value in the file
// header
.setSampleRateHertz(16000)
.setModel("video")
.build();
RecognitionAudio recognitionAudio =
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio);
// Just print the first result here.
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
Node.js
// Imports the Google Cloud client library for Beta API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech').v1p1beta1;
const fs = require('fs');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const model = 'Model to use, e.g. phone_call, video, default';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
model: model,
};
const audio = {
content: fs.readFileSync(filename).toString('base64'),
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log('Transcription: ', transcription);
PHP
use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
/**
* @param string $audioFile path to an audio file
* @param string $model video
*/
function transcribe_model_selection(string $audioFile, string $model)
{
// change these variables if necessary
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 32000;
$languageCode = 'en-US';
// get contents of a file into a string
$content = file_get_contents($audioFile);
// set string as audio content
$audio = (new RecognitionAudio())
->setContent($content);
// set config
$config = (new RecognitionConfig())
->setEncoding($encoding)
->setSampleRateHertz($sampleRateHertz)
->setLanguageCode($languageCode)
->setModel($model);
// create the speech client
$client = new SpeechClient();
// make the API call
$response = $client->recognize($config, $audio);
$results = $response->getResults();
// print results
foreach ($results as $result) {
$alternatives = $result->getAlternatives();
$mostLikely = $alternatives[0];
$transcript = $mostLikely->getTranscript();
$confidence = $mostLikely->getConfidence();
printf('Transcript: %s' . PHP_EOL, $transcript);
printf('Confidence: %s' . PHP_EOL, $confidence);
}
$client->close();
}
Python
def transcribe_model_selection(speech_file, model):
"""Transcribe the given audio file synchronously with
the selected model."""
from google.cloud import speech
client = speech.SpeechClient()
with open(speech_file, "rb") as audio_file:
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code="en-US",
model=model,
)
response = client.recognize(config=config, audio=audio)
for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print("-" * 20)
print("First alternative of result {}".format(i))
print(u"Transcript: {}".format(alternative.transcript))
Ruby
# file_path = "path/to/audio.wav"
require "google/cloud/speech"
speech = Google::Cloud::Speech.speech
config = {
encoding: :LINEAR16,
sample_rate_hertz: 16_000,
language_code: "en-US",
model: model
}
file = File.binread file_path
audio = { content: file }
operation = speech.long_running_recognize config: config, audio: audio
puts "Operation started"
operation.wait_until_done!
raise operation.results.message if operation.error?
results = operation.response.results
results.each_with_index do |result, i|
alternative = result.alternatives.first
puts "-" * 20
puts "First alternative of result #{i}"
puts "Transcript: #{alternative.transcript}"
end
Passaggi successivi
Per cercare e filtrare esempi di codice per altri prodotti Google Cloud, consulta la pagina Browser di esempio Google Cloud.