Transcribir un archivo multicanal local

Transcribe un archivo de audio local que incluya más de un canal.

Explora más

Para obtener documentación en la que se incluye esta muestra de código, consulta lo siguiente:

Muestra de código

Java

/**
 * Transcribe a local audio file with multi-channel recognition
 *
 * @param fileName the path to local audio file
 */
public static void transcribeMultiChannel(String fileName) throws Exception {
  Path path = Paths.get(fileName);
  byte[] content = Files.readAllBytes(path);

  try (SpeechClient speechClient = SpeechClient.create()) {
    // Get the contents of the local audio file
    RecognitionAudio recognitionAudio =
        RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();

    // Configure request to enable multiple channels
    RecognitionConfig config =
        RecognitionConfig.newBuilder()
            .setEncoding(AudioEncoding.LINEAR16)
            .setLanguageCode("en-US")
            .setSampleRateHertz(44100)
            .setAudioChannelCount(2)
            .setEnableSeparateRecognitionPerChannel(true)
            .build();

    // Perform the transcription request
    RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);

    // Print out the results
    for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
      // There can be several alternative transcripts for a given chunk of speech. Just use the
      // first (most likely) one here.
      SpeechRecognitionAlternative alternative = result.getAlternatives(0);
      System.out.format("Transcript : %s\n", alternative.getTranscript());
      System.out.printf("Channel Tag : %s\n", result.getChannelTag());
    }
  }
}

Node.js

const fs = require('fs');

// Imports the Google Cloud client library
const speech = require('@google-cloud/speech').v1;

// Creates a client
const client = new speech.SpeechClient();

/**
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';

const config = {
  encoding: 'LINEAR16',
  languageCode: 'en-US',
  audioChannelCount: 2,
  enableSeparateRecognitionPerChannel: true,
};

const audio = {
  content: fs.readFileSync(fileName).toString('base64'),
};

const request = {
  config: config,
  audio: audio,
};

const [response] = await client.recognize(request);
const transcription = response.results
  .map(
    result =>
      ` Channel Tag: ${result.channelTag} ${result.alternatives[0].transcript}`
  )
  .join('\n');
console.log(`Transcription: \n${transcription}`);

Python

from google.cloud import speech

client = speech.SpeechClient()

with open(speech_file, "rb") as audio_file:
    content = audio_file.read()

audio = speech.RecognitionAudio(content=content)

config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=44100,
    language_code="en-US",
    audio_channel_count=2,
    enable_separate_recognition_per_channel=True,
)

response = client.recognize(config=config, audio=audio)

for i, result in enumerate(response.results):
    alternative = result.alternatives[0]
    print("-" * 20)
    print("First alternative of result {}".format(i))
    print(u"Transcript: {}".format(alternative.transcript))
    print(u"Channel Tag: {}".format(result.channel_tag))

Ruby

# audio_file_path = "path/to/audio.wav"

require "google/cloud/speech"

speech = Google::Cloud::Speech.speech

config = {
  encoding:                                :LINEAR16,
  sample_rate_hertz:                       44_100,
  language_code:                           "en-US",
  audio_channel_count:                     2,
  enable_separate_recognition_per_channel: true
}

audio_file = File.binread audio_file_path
audio      = { content: audio_file }

response = speech.recognize config: config, audio: audio

results = response.results

results.each_with_index do |result, i|
  alternative = result.alternatives.first
  puts "-" * 20
  puts "First alternative of result #{i}"
  puts "Transcript: #{alternative.transcript}"
  puts "Channel Tag: #{result.channel_tag}"
end

¿Qué sigue?

Para buscar y filtrar muestras de código para otros productos de Google Cloud, consulta el navegador de muestra de Google Cloud.