Identifica i diversi altoparlanti nel campione audio.
Esempio di codice
Java
import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.WordInfo;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
class TranscribeDiarization {
static void transcribeDiarization() throws IOException {
// TODO(developer): Replace these variables before running the sample.
String fileName = "resources/commercial_mono.wav";
transcribeDiarization(fileName);
}
// Transcribe the given audio file using speaker diarization.
static void transcribeDiarization(String fileName) throws IOException {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (SpeechClient client = SpeechClient.create()) {
// Get the contents of the local audio file
RecognitionAudio recognitionAudio =
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
SpeakerDiarizationConfig speakerDiarizationConfig =
SpeakerDiarizationConfig.newBuilder()
.setEnableSpeakerDiarization(true)
.setMinSpeakerCount(2)
.setMaxSpeakerCount(2)
.build();
// Configure request to enable Speaker diarization
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(8000)
.setDiarizationConfig(speakerDiarizationConfig)
.build();
// Perform the transcription request
RecognizeResponse recognizeResponse = client.recognize(config, recognitionAudio);
// Speaker Tags are only included in the last result object, which has only one alternative.
SpeechRecognitionAlternative alternative =
recognizeResponse.getResults(recognizeResponse.getResultsCount() - 1).getAlternatives(0);
// The alternative is made up of WordInfo objects that contain the speaker_tag.
WordInfo wordInfo = alternative.getWords(0);
int currentSpeakerTag = wordInfo.getSpeakerTag();
// For each word, get all the words associated with one speaker, once the speaker changes,
// add a new line with the new speaker and their spoken words.
StringBuilder speakerWords =
new StringBuilder(
String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
for (int i = 1; i < alternative.getWordsCount(); i++) {
wordInfo = alternative.getWords(i);
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
speakerWords.append(" ");
speakerWords.append(wordInfo.getWord());
} else {
speakerWords.append(
String.format("\nSpeaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
currentSpeakerTag = wordInfo.getSpeakerTag();
}
}
System.out.println(speakerWords.toString());
}
}
}
Node.js
const fs = require('fs');
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
// Set config for Diarization
const diarizationConfig = {
enableSpeakerDiarization: true,
maxSpeakerCount: 2,
};
const config = {
encoding: 'LINEAR16',
sampleRateHertz: 8000,
languageCode: 'en-US',
diarizationConfig: diarizationConfig,
model: 'phone_call',
};
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
const audio = {
content: fs.readFileSync(fileName).toString('base64'),
};
const request = {
config: config,
audio: audio,
};
const [response] = await client.recognize(request);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
console.log('Speaker Diarization:');
const result = response.results[response.results.length - 1];
const wordsInfo = result.alternatives[0].words;
// Note: The transcript within each result is separate and sequential per result.
// However, the words list within an alternative includes all the words
// from all the results thus far. Thus, to get all the words with speaker
// tags, you only have to take the words list from the last result:
wordsInfo.forEach(a =>
console.log(` word: ${a.word}, speakerTag: ${a.speakerTag}`)
);
Passaggi successivi
Per cercare e filtrare esempi di codice per altri prodotti Google Cloud, consulta la pagina Browser di esempio Google Cloud.