Trascrivi un file audio locale, inclusa la punteggiatura automatica.
Per saperne di più
Per una documentazione dettagliata che includa questo esempio di codice, consulta quanto segue:
Esempio di codice
Go
func autoPunctuation(w io.Writer, path string) error {
ctx := context.Background()
client, err := speech.NewClient(ctx)
if err != nil {
return fmt.Errorf("NewClient: %v", err)
}
defer client.Close()
// path = "../testdata/commercial_mono.wav"
data, err := ioutil.ReadFile(path)
if err != nil {
return fmt.Errorf("ReadFile: %v", err)
}
resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 8000,
LanguageCode: "en-US",
// Enable automatic punctuation.
EnableAutomaticPunctuation: true,
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: data},
},
})
if err != nil {
return fmt.Errorf("Recognize: %v", err)
}
for i, result := range resp.Results {
fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
fmt.Fprintf(w, "Result %d\n", i+1)
for j, alternative := range result.Alternatives {
fmt.Fprintf(w, "Alternative %d: %s\n", j+1, alternative.Transcript)
}
}
return nil
}
Java
/**
* Performs transcription on remote FLAC file and prints the transcription.
*
* @param gcsUri the path to the remote FLAC audio file to transcribe.
*/
public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
// Configure request with raw PCM audio
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.setEnableAutomaticPunctuation(true)
.build();
// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
speechClient.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}
// Just print the first result here.
SpeechRecognitionResult result = response.get().getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
Node.js
// Imports the Google Cloud client library for API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech');
const fs = require('fs');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
* Include the sampleRateHertz field in the config object.
*/
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';
const config = {
encoding: encoding,
languageCode: languageCode,
enableAutomaticPunctuation: true,
};
const audio = {
content: fs.readFileSync(filename).toString('base64'),
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log('Transcription: ', transcription);
PHP
use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
/**
* @param string $audioFile path to an audio file
*/
function transcribe_auto_punctuation(string $audioFile)
{
// change these variables if necessary
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 32000;
$languageCode = 'en-US';
// get contents of a file into a string
$content = file_get_contents($audioFile);
// set string as audio content
$audio = (new RecognitionAudio())
->setContent($content);
// set config
$config = (new RecognitionConfig())
->setEncoding($encoding)
->setSampleRateHertz($sampleRateHertz)
->setLanguageCode($languageCode)
->setEnableAutomaticPunctuation(true);
// create the speech client
$client = new SpeechClient();
// make the API call
$response = $client->recognize($config, $audio);
$results = $response->getResults();
// print results
foreach ($results as $result) {
$alternatives = $result->getAlternatives();
$mostLikely = $alternatives[0];
$transcript = $mostLikely->getTranscript();
$confidence = $mostLikely->getConfidence();
printf('Transcript: %s' . PHP_EOL, $transcript);
printf('Confidence: %s' . PHP_EOL, $confidence);
}
$client->close();
}
Python
from google.cloud import speech
client = speech.SpeechClient()
# path = 'resources/commercial_mono.wav'
with io.open(path, "rb") as audio_file:
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=8000,
language_code="en-US",
# Enable automatic punctuation
enable_automatic_punctuation=True,
)
response = client.recognize(config=config, audio=audio)
for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print("-" * 20)
print("First alternative of result {}".format(i))
print("Transcript: {}".format(alternative.transcript))
Ruby
# audio_file_path = "path/to/audio.wav"
require "google/cloud/speech"
speech = Google::Cloud::Speech.speech
config = {
encoding: :LINEAR16,
sample_rate_hertz: 8000,
language_code: "en-US",
enable_automatic_punctuation: true
}
audio_file = File.binread audio_file_path
audio = { content: audio_file }
operation = speech.long_running_recognize config: config, audio: audio
puts "Operation started"
operation.wait_until_done!
raise operation.results.message if operation.error?
results = operation.response.results
results.each_with_index do |result, i|
alternative = result.alternatives.first
puts "-" * 20
puts "First alternative of result #{i}"
puts "Transcript: #{alternative.transcript}"
end
Passaggi successivi
Per cercare e filtrare esempi di codice per altri prodotti Google Cloud, consulta la pagina Browser di esempio Google Cloud.