Transcription of a file in Cloud Storage with diarization

Stay organized with collections Save and categorize content based on your preferences.

Recognize multiple speakers in an audio file stored in Cloud Storage.

Code sample

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.speech.v1.LongRunningRecognizeMetadata;
import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.WordInfo;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

public class TranscribeDiarizationGcs {

  static void transcribeDiarizationGcs()
      throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String gcsUri = "gs://cloud-samples-data/speech/commercial_mono.wav";
    transcribeDiarizationGcs(gcsUri);
  }

  // Transcribe the give gcs file using speaker diarization
  public static void transcribeDiarizationGcs(String gcsUri)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (SpeechClient speechClient = SpeechClient.create()) {
      SpeakerDiarizationConfig speakerDiarizationConfig =
          SpeakerDiarizationConfig.newBuilder()
              .setEnableSpeakerDiarization(true)
              .setMinSpeakerCount(2)
              .setMaxSpeakerCount(2)
              .build();
      // Configure request to enable Speaker diarization
      RecognitionConfig config =
          RecognitionConfig.newBuilder()
              .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
              .setLanguageCode("en-US")
              .setSampleRateHertz(8000)
              .setDiarizationConfig(speakerDiarizationConfig)
              .build();
      // Set the remote path for the audio file
      RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();

      // Use non-blocking call for getting file transcription
      OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> future =
          speechClient.longRunningRecognizeAsync(config, audio);
      System.out.println("Waiting for response...");

      // Speaker Tags are only included in the last result object, which has only one alternative.
      LongRunningRecognizeResponse response = future.get();
      SpeechRecognitionAlternative alternative =
          response.getResults(response.getResultsCount() - 1).getAlternatives(0);
      // The alternative is made up of WordInfo objects that contain the speaker_tag.
      WordInfo wordInfo = alternative.getWords(0);
      int currentSpeakerTag = wordInfo.getSpeakerTag();
      // For each word, get all the words associated with one speaker, once the speaker changes,
      // add a new line with the new speaker and their spoken words.
      StringBuilder speakerWords =
          new StringBuilder(
              String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
      for (int i = 1; i < alternative.getWordsCount(); i++) {
        wordInfo = alternative.getWords(i);
        if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
          speakerWords.append(" ");
          speakerWords.append(wordInfo.getWord());
        } else {
          speakerWords.append(
              String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
          currentSpeakerTag = wordInfo.getSpeakerTag();
        }
      }
      System.out.println(speakerWords.toString());
    }
  }
}

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.