Synchronously transcribe a local file

Perform synchronous transcription on a local audio file.

Documentation pages that include this code sample

To view the code sample used in context, see the following documentation:

Code sample

Go


func recognize(w io.Writer, file string) error {
	ctx := context.Background()

	client, err := speech.NewClient(ctx)
	if err != nil {
		return err
	}
	defer client.Close()

	data, err := ioutil.ReadFile(file)
	if err != nil {
		return err
	}

	// Send the contents of the audio file with the encoding and
	// and sample rate information to be transcripted.
	resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{
		Config: &speechpb.RecognitionConfig{
			Encoding:        speechpb.RecognitionConfig_LINEAR16,
			SampleRateHertz: 16000,
			LanguageCode:    "en-US",
		},
		Audio: &speechpb.RecognitionAudio{
			AudioSource: &speechpb.RecognitionAudio_Content{Content: data},
		},
	})

	// Print the results.
	for _, result := range resp.Results {
		for _, alt := range result.Alternatives {
			fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence)
		}
	}
	return nil
}

Java

/**
 * Performs speech recognition on raw PCM audio and prints the transcription.
 *
 * @param fileName the path to a PCM audio file to transcribe.
 */
public static void syncRecognizeFile(String fileName) throws Exception {
  try (SpeechClient speech = SpeechClient.create()) {
    Path path = Paths.get(fileName);
    byte[] data = Files.readAllBytes(path);
    ByteString audioBytes = ByteString.copyFrom(data);

    // Configure request with local raw PCM audio
    RecognitionConfig config =
        RecognitionConfig.newBuilder()
            .setEncoding(AudioEncoding.LINEAR16)
            .setLanguageCode("en-US")
            .setSampleRateHertz(16000)
            .build();
    RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();

    // Use blocking call to get audio transcript
    RecognizeResponse response = speech.recognize(config, audio);
    List<SpeechRecognitionResult> results = response.getResultsList();

    for (SpeechRecognitionResult result : results) {
      // There can be several alternative transcripts for a given chunk of speech. Just use the
      // first (most likely) one here.
      SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
      System.out.printf("Transcription: %s%n", alternative.getTranscript());
    }
  }
}

Node.js

// Provides "hints" to the speech recognizer to favor
// specific classes of words in the results.

// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');

// Creates a client
const client = new speech.SpeechClient();

async function transcribeContextClasses() {
  // storageUri = 'gs://YOUR_BUCKET_ID/path/to/your/file.wav'
  const audio = {
    uri: storageUri,
  };

  // SpeechContext: to configure your speech_context see:
  // https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#speechcontext
  // Full list of supported phrases(class tokens) here:
  // https://cloud.google.com/speech-to-text/docs/class-tokens
  const speechContext = {
    phrases: ['$TIME'],
  };

  // RecognitionConfig: to configure your encoding and sample_rate_hertz, see:
  // https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#recognitionconfig
  const config = {
    encoding: 'LINEAR16',
    sampleRateHertz: 8000,
    languageCode: 'en-US',
    speechContexts: [speechContext],
  };

  const request = {
    config: config,
    audio: audio,
  };

  // Detects speech in the audio file.
  const [response] = await client.recognize(request);
  response.results.forEach((result, index) => {
    const transcript = result.alternatives[0].transcript;
    console.log('-'.repeat(20));
    console.log(`First alternative of result ${index}`);
    console.log(`Transcript: ${transcript}`);
  });
}

transcribeContextClasses();

PHP

use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;

/** Uncomment and populate these variables in your code */
// $audioFile = 'path to an audio file';

// change these variables if necessary
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 32000;
$languageCode = 'en-US';

// get contents of a file into a string
$content = file_get_contents($audioFile);

// set string as audio content
$audio = (new RecognitionAudio())
    ->setContent($content);

// set config
$config = (new RecognitionConfig())
    ->setEncoding($encoding)
    ->setSampleRateHertz($sampleRateHertz)
    ->setLanguageCode($languageCode);

// create the speech client
$client = new SpeechClient();

try {
    $response = $client->recognize($config, $audio);
    foreach ($response->getResults() as $result) {
        $alternatives = $result->getAlternatives();
        $mostLikely = $alternatives[0];
        $transcript = $mostLikely->getTranscript();
        $confidence = $mostLikely->getConfidence();
        printf('Transcript: %s' . PHP_EOL, $transcript);
        printf('Confidence: %s' . PHP_EOL, $confidence);
    }
} finally {
    $client->close();
}

Python

def transcribe_file(speech_file):
    """Transcribe the given audio file."""
    from google.cloud import speech
    import io

    client = speech.SpeechClient()

    with io.open(speech_file, "rb") as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code="en-US",
    )

    response = client.recognize(config=config, audio=audio)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print(u"Transcript: {}".format(result.alternatives[0].transcript))

Ruby

# audio_file_path = "Path to file on which to perform speech recognition"

require "google/cloud/speech"

speech = Google::Cloud::Speech.speech

audio_file = File.binread audio_file_path
config     = { encoding:          :LINEAR16,
               sample_rate_hertz: 16_000,
               language_code:     "en-US" }
audio      = { content: audio_file }

response = speech.recognize config: config, audio: audio

results = response.results

alternatives = results.first.alternatives
alternatives.each do |alternative|
  puts "Transcription: #{alternative.transcript}"
end

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser