Transcribe a local file using a transcription model

Stay organized with collections Save and categorize content based on your preferences.

Transcribe a local audio file using a trained transcription model.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample


func modelSelection(w io.Writer, path string) error {
	ctx := context.Background()

	client, err := speech.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)
	defer client.Close()

	// path = "../testdata/Google_Gnome.wav"
	data, err := ioutil.ReadFile(path)
	if err != nil {
		return fmt.Errorf("ReadFile: %v", err)

	req := &speechpb.RecognizeRequest{
		Config: &speechpb.RecognitionConfig{
			Encoding:        speechpb.RecognitionConfig_LINEAR16,
			SampleRateHertz: 16000,
			LanguageCode:    "en-US",
			Model:           "video",
		Audio: &speechpb.RecognitionAudio{
			AudioSource: &speechpb.RecognitionAudio_Content{Content: data},

	resp, err := client.Recognize(ctx, req)
	if err != nil {
		return fmt.Errorf("Recognize: %v", err)

	for i, result := range resp.Results {
		fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
		fmt.Fprintf(w, "Result %d\n", i+1)
		for j, alternative := range result.Alternatives {
			fmt.Fprintf(w, "Alternative %d: %s\n", j+1, alternative.Transcript)
	return nil


 * Performs transcription of the given audio file synchronously with the selected model.
 * @param fileName the path to a audio file to transcribe
public static void transcribeModelSelection(String fileName) throws Exception {
  Path path = Paths.get(fileName);
  byte[] content = Files.readAllBytes(path);

  try (SpeechClient speech = SpeechClient.create()) {
    // Configure request with video media type
    RecognitionConfig recConfig =
            // encoding may either be omitted or must match the value in the file header
            // sample rate hertz may be either be omitted or must match the value in the file
            // header

    RecognitionAudio recognitionAudio =

    RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio);
    // Just print the first result here.
    SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
    // There can be several alternative transcripts for a given chunk of speech. Just use the
    // first (most likely) one here.
    SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
    System.out.printf("Transcript : %s\n", alternative.getTranscript());


// Imports the Google Cloud client library for Beta API
 * TODO(developer): Update client library import to use new
 * version of API when desired features become available
const speech = require('@google-cloud/speech').v1p1beta1;
const fs = require('fs');

// Creates a client
const client = new speech.SpeechClient();

 * TODO(developer): Uncomment the following lines before running the sample.
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const model = 'Model to use, e.g. phone_call, video, default';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
  encoding: encoding,
  sampleRateHertz: sampleRateHertz,
  languageCode: languageCode,
  model: model,
const audio = {
  content: fs.readFileSync(filename).toString('base64'),

const request = {
  config: config,
  audio: audio,

// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results
  .map(result => result.alternatives[0].transcript)
console.log('Transcription: ', transcription);


use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;

 * @param string $audioFile path to an audio file
 * @param string $model video
function transcribe_model_selection(string $audioFile, string $model)
    // change these variables if necessary
    $encoding = AudioEncoding::LINEAR16;
    $sampleRateHertz = 32000;
    $languageCode = 'en-US';

    // get contents of a file into a string
    $content = file_get_contents($audioFile);

    // set string as audio content
    $audio = (new RecognitionAudio())

    // set config
    $config = (new RecognitionConfig())

    // create the speech client
    $client = new SpeechClient();

    // make the API call
    $response = $client->recognize($config, $audio);
    $results = $response->getResults();

    // print results
    foreach ($results as $result) {
        $alternatives = $result->getAlternatives();
        $mostLikely = $alternatives[0];
        $transcript = $mostLikely->getTranscript();
        $confidence = $mostLikely->getConfidence();
        printf('Transcript: %s' . PHP_EOL, $transcript);
        printf('Confidence: %s' . PHP_EOL, $confidence);



def transcribe_model_selection(speech_file, model):
    """Transcribe the given audio file synchronously with
    the selected model."""
    from import speech

    client = speech.SpeechClient()

    with open(speech_file, "rb") as audio_file:
        content =

    audio = speech.RecognitionAudio(content=content)

    config = speech.RecognitionConfig(

    response = client.recognize(config=config, audio=audio)

    for i, result in enumerate(response.results):
        alternative = result.alternatives[0]
        print("-" * 20)
        print("First alternative of result {}".format(i))
        print("Transcript: {}".format(alternative.transcript))


# file_path = "path/to/audio.wav"

require "google/cloud/speech"

speech = Google::Cloud::Speech.speech

config = {
  encoding:          :LINEAR16,
  sample_rate_hertz: 16_000,
  language_code:     "en-US",
  model:             model

file  = File.binread file_path
audio = { content: file }

operation = speech.long_running_recognize config: config, audio: audio

puts "Operation started"


raise operation.results.message if operation.error?

results = operation.response.results

results.each_with_index do |result, i|
  alternative = result.alternatives.first
  puts "-" * 20
  puts "First alternative of result #{i}"
  puts "Transcript: #{alternative.transcript}"

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.