Asynchronously transcribe a local audio file

Stay organized with collections Save and categorize content based on your preferences.

Perform asynchronous transcription on a local audio file.

Code sample


func send(w io.Writer, client *speech.Client, filename string) error {
	ctx := context.Background()
	data, err := ioutil.ReadFile(filename)
	if err != nil {
		return err

	// Send the contents of the audio file with the encoding and
	// and sample rate information to be transcripted.
	req := &speechpb.LongRunningRecognizeRequest{
		Config: &speechpb.RecognitionConfig{
			Encoding:        speechpb.RecognitionConfig_LINEAR16,
			SampleRateHertz: 16000,
			LanguageCode:    "en-US",
		Audio: &speechpb.RecognitionAudio{
			AudioSource: &speechpb.RecognitionAudio_Content{Content: data},

	op, err := client.LongRunningRecognize(ctx, req)
	if err != nil {
		return err
	resp, err := op.Wait(ctx)
	if err != nil {
		return err

	// Print the results.
	for _, result := range resp.Results {
		for _, alt := range result.Alternatives {
			fmt.Fprintf(w, "\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence)
	return nil


 * Performs non-blocking speech recognition on raw PCM audio and prints the transcription. Note
 * that transcription is limited to 60 seconds audio.
 * @param fileName the path to a PCM audio file to transcribe.
public static void asyncRecognizeFile(String fileName) throws Exception {
  // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
  try (SpeechClient speech = SpeechClient.create()) {

    Path path = Paths.get(fileName);
    byte[] data = Files.readAllBytes(path);
    ByteString audioBytes = ByteString.copyFrom(data);

    // Configure request with local raw PCM audio
    RecognitionConfig config =
    RecognitionAudio audio = RecognitionAudio.newBuilder().setContent(audioBytes).build();

    // Use non-blocking call for getting file transcription
    OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
        speech.longRunningRecognizeAsync(config, audio);

    while (!response.isDone()) {
      System.out.println("Waiting for response...");

    List<SpeechRecognitionResult> results = response.get().getResultsList();

    for (SpeechRecognitionResult result : results) {
      // There can be several alternative transcripts for a given chunk of speech. Just use the
      // first (most likely) one here.
      SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
      System.out.printf("Transcription: %s%n", alternative.getTranscript());


// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
const fs = require('fs');

// Creates a client
const client = new speech.SpeechClient();

 * TODO(developer): Uncomment the following lines before running the sample.
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
  encoding: encoding,
  sampleRateHertz: sampleRateHertz,
  languageCode: languageCode,

 * Note that transcription is limited to 60 seconds audio.
 * Use a GCS file for audio longer than 1 minute.
const audio = {
  content: fs.readFileSync(filename).toString('base64'),

const request = {
  config: config,
  audio: audio,

// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
const [operation] = await client.longRunningRecognize(request);

// Get a Promise representation of the final result of the job
const [response] = await operation.promise();
const transcription = response.results
  .map(result => result.alternatives[0].transcript)
console.log(`Transcription: ${transcription}`);


use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;

 * @param string $audioFile path to an audio file
function transcribe_async(string $audioFile)
    // change these variables if necessary
    $encoding = AudioEncoding::LINEAR16;
    $sampleRateHertz = 32000;
    $languageCode = 'en-US';

    // get contents of a file into a string
    $content = file_get_contents($audioFile);

    // set string as audio content
    $audio = (new RecognitionAudio())

    // set config
    $config = (new RecognitionConfig())

    // create the speech client
    $client = new SpeechClient();

    // create the asyncronous recognize operation
    $operation = $client->longRunningRecognize($config, $audio);

    if ($operation->operationSucceeded()) {
        $response = $operation->getResult();

        // each result is for a consecutive portion of the audio. iterate
        // through them to get the transcripts for the entire audio file.
        foreach ($response->getResults() as $result) {
            $alternatives = $result->getAlternatives();
            $mostLikely = $alternatives[0];
            $transcript = $mostLikely->getTranscript();
            $confidence = $mostLikely->getConfidence();
            printf('Transcript: %s' . PHP_EOL, $transcript);
            printf('Confidence: %s' . PHP_EOL, $confidence);
    } else {



def transcribe_file(speech_file):
    """Transcribe the given audio file asynchronously."""
    from import speech

    client = speech.SpeechClient()

    with open(speech_file, "rb") as audio_file:
        content =

     Note that transcription is limited to a 60 seconds audio file.
     Use a GCS file for audio longer than 1 minute.
    audio = speech.RecognitionAudio(content=content)

    config = speech.RecognitionConfig(

    operation = client.long_running_recognize(config=config, audio=audio)

    print("Waiting for operation to complete...")
    response = operation.result(timeout=90)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print("Transcript: {}".format(result.alternatives[0].transcript))
        print("Confidence: {}".format(result.alternatives[0].confidence))


# audio_file_path = "Path to file on which to perform speech recognition"

require "google/cloud/speech"

speech = Google::Cloud::Speech.speech

audio_file = File.binread audio_file_path
config     = { encoding:          :LINEAR16,
               sample_rate_hertz: 16_000,
               language_code:     "en-US" }
audio      = { content: audio_file }

operation = speech.long_running_recognize config: config, audio: audio

puts "Operation started"


raise operation.results.message if operation.error?

results = operation.response.results

alternatives = results.first.alternatives
alternatives.each do |alternative|
  puts "Transcription: #{alternative.transcript}"

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.