Trascrivi un file audio locale, inclusi i metadati di riconoscimento nella risposta.
Esempio di codice
* Transcribe the given audio file and include recognition metadata in the request.
* @param fileName the path to an audio file.
public static void transcribeFileWithMetadata(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speechClient = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio =
// Construct a recognition metadata object.
// Most metadata fields are specified as enums that can be found
// in speech.enums.RecognitionMetadata
RecognitionMetadata metadata =
.setRecordingDeviceName("Pixel 2 XL") // Some metadata fields are free form strings
// And some are integers, for instance the 6 digit NAICS code
// Configure request to enable enhanced models
RecognitionConfig config =
.setMetadata(metadata) // Add the metadata to the config
// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
// Print out the results
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
System.out.format("Transcript: %s\n\n", alternative.getTranscript());
// Imports the Google Cloud client library for Beta API
* TODO(developer): Update client library import to use new
* version of API when desired features become available
const speech = require('@google-cloud/speech').v1p1beta1;
const fs = require('fs');
// Creates a client
const client = new speech.SpeechClient();
async function syncRecognizeWithMetaData() {
* TODO(developer): Uncomment the following lines before running the sample.
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';
const recognitionMetadata = {
interactionType: 'DISCUSSION',
microphoneDistance: 'NEARFIELD',
recordingDeviceType: 'SMARTPHONE',
recordingDeviceName: 'Pixel 2 XL',
industryNaicsCodeOfAudio: 519190,
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
metadata: recognitionMetadata,
const audio = {
content: fs.readFileSync(filename).toString('base64'),
const request = {
config: config,
audio: audio,
// Detects speech in the audio file
const [response] = await client.recognize(request);
response.results.forEach(result => {
const alternative = result.alternatives[0];
from import speech_v1p1beta1 as speech
client = speech.SpeechClient()
speech_file = "resources/commercial_mono.wav"
with, "rb") as audio_file:
content =
# Here we construct a recognition metadata object.
# Most metadata fields are specified as enums that can be found
# in speech.enums.RecognitionMetadata
metadata = speech.RecognitionMetadata()
metadata.interaction_type = speech.RecognitionMetadata.InteractionType.DISCUSSION
metadata.microphone_distance = (
metadata.recording_device_type = (
# Some metadata fields are free form strings
metadata.recording_device_name = "Pixel 2 XL"
# And some are integers, for instance the 6 digit NAICS code
metadata.industry_naics_code_of_audio = 519190
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
# Add this in the request to send metadata.
response = client.recognize(config=config, audio=audio)
for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print("-" * 20)
print(u"First alternative of result {}".format(i))
print(u"Transcript: {}".format(alternative.transcript))
Passaggi successivi
Per cercare e filtrare esempi di codice per altri prodotti Google Cloud, consulta la pagina Browser di esempio Google Cloud.