/**
* Performs transcription of the given audio file synchronously with the selected model.
*
* @param fileName the path to a audio file to transcribe
*/
public static void transcribeModelSelection(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
try (SpeechClient speech = SpeechClient.create()) {
// Configure request with video media type
RecognitionConfig recConfig =
RecognitionConfig.newBuilder()
// encoding may either be omitted or must match the value in the file header
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
// sample rate hertz may be either be omitted or must match the value in the file
// header
.setSampleRateHertz(16000)
.setModel("video")
.build();
RecognitionAudio recognitionAudio =
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio);
// Just print the first result here.
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
// Imports the Google Cloud client library for Beta API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech').v1p1beta1;
const fs = require('fs');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const model = 'Model to use, e.g. phone_call, video, default';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
model: model,
};
const audio = {
content: fs.readFileSync(filename).toString('base64'),
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log('Transcription: ', transcription);
use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;
/** Uncomment and populate these variables in your code */
// $audioFile = 'path to an audio file';
// $model = 'video';
// change these variables if necessary
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 32000;
$languageCode = 'en-US';
// get contents of a file into a string
$content = file_get_contents($audioFile);
// set string as audio content
$audio = (new RecognitionAudio())
->setContent($content);
// set config
$config = (new RecognitionConfig())
->setEncoding($encoding)
->setSampleRateHertz($sampleRateHertz)
->setLanguageCode($languageCode)
->setModel($model);
// create the speech client
$client = new SpeechClient();
// make the API call
$response = $client->recognize($config, $audio);
$results = $response->getResults();
// print results
foreach ($results as $result) {
$alternatives = $result->getAlternatives();
$mostLikely = $alternatives[0];
$transcript = $mostLikely->getTranscript();
$confidence = $mostLikely->getConfidence();
printf('Transcript: %s' . PHP_EOL, $transcript);
printf('Confidence: %s' . PHP_EOL, $confidence);
}
$client->close();
def transcribe_model_selection(speech_file, model):
"""Transcribe the given audio file synchronously with
the selected model."""
from google.cloud import speech
client = speech.SpeechClient()
with open(speech_file, "rb") as audio_file:
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code="en-US",
model=model,
)
response = client.recognize(config=config, audio=audio)
for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print("-" * 20)
print("First alternative of result {}".format(i))
print(u"Transcript: {}".format(alternative.transcript))