テキストを音声プロファイルで合成する

さまざまなタイプのハードウェアで再生する合成音声を最適化するための、音声プロファイルを指定するテキストを合成します。

このコードサンプルが含まれるドキュメント ページ

コンテキストで使用されているコードサンプルを見るには、次のドキュメントをご覧ください。

コードサンプル

C#

/// <summary>
/// Creates an audio file from the text input, applying the specifed
/// device profile to the output.
/// </summary>
/// <param name="text">Text to synthesize into audio</param>
/// <param name="outputFile">Name of audio output file</param>
/// <param name="effectProfileId">Audio effect profile to apply</param>
/// <remarks>
/// Output file saved in project folder.
/// </remarks>
public static int SynthesizeTextWithAudioProfile(string text,
                                                 string outputFile,
                                                 string effectProfileId)
{
    var client = TextToSpeechClient.Create();
    var response = client.SynthesizeSpeech(new SynthesizeSpeechRequest
    {
        Input = new SynthesisInput
        {
            Text = text
        },
        // Note: voices can also be specified by name
        // Names of voices can be retrieved with client.ListVoices().
        Voice = new VoiceSelectionParams
        {
            LanguageCode = "en-US",
            SsmlGender = SsmlVoiceGender.Female
        },
        AudioConfig = new AudioConfig
        {
            AudioEncoding = AudioEncoding.Mp3,
            // Note: you can pass in multiple audio effects profiles.
            // They are applied in the same order as provided.
            EffectsProfileId = { effectProfileId }
        }
    });

    // The response's AudioContent is binary.
    using (Stream output = File.Create(outputFile))
    {
        response.AudioContent.WriteTo(output);
    }

    return 0;
}

Java

/**
 * Demonstrates using the Text to Speech client with audio profiles to synthesize text or ssml
 *
 * @param text the raw text to be synthesized. (e.g., "Hello there!")
 * @param effectsProfile audio profile to be used for synthesis. (e.g.,
 *     "telephony-class-application")
 * @throws Exception on TextToSpeechClient Errors.
 */
public static ByteString synthesizeTextWithAudioProfile(String text, String effectsProfile)
    throws Exception {
  // Instantiates a client
  try (TextToSpeechClient textToSpeechClient = TextToSpeechClient.create()) {
    // Set the text input to be synthesized
    SynthesisInput input = SynthesisInput.newBuilder().setText(text).build();

    // Build the voice request
    VoiceSelectionParams voice =
        VoiceSelectionParams.newBuilder()
            .setLanguageCode("en-US") // languageCode = "en_us"
            .setSsmlGender(SsmlVoiceGender.FEMALE) // ssmlVoiceGender = SsmlVoiceGender.FEMALE
            .build();

    // Select the type of audio file you want returned and the audio profile
    AudioConfig audioConfig =
        AudioConfig.newBuilder()
            .setAudioEncoding(AudioEncoding.MP3) // MP3 audio.
            .addEffectsProfileId(effectsProfile) // audio profile
            .build();

    // Perform the text-to-speech request
    SynthesizeSpeechResponse response =
        textToSpeechClient.synthesizeSpeech(input, voice, audioConfig);

    // Get the audio contents from the response
    ByteString audioContents = response.getAudioContent();

    // Write the response to the output file.
    try (OutputStream out = new FileOutputStream("output.mp3")) {
      out.write(audioContents.toByteArray());
      System.out.println("Audio content written to file \"output.mp3\"");
      return audioContents;
    }
  }
}

Node.js


/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const text = 'Text you want to vocalize';
// const outputFile = 'YOUR_OUTPUT_FILE_LOCAtION;
// const languageCode = 'LANGUAGE_CODE_FOR_OUTPUT';
// const ssmlGender = 'SSML_GENDER_OF_SPEAKER';

// Imports the Google Cloud client library
const speech = require('@google-cloud/text-to-speech');
const fs = require('fs');
const util = require('util');

// Creates a client
const client = new speech.TextToSpeechClient();

async function synthesizeWithEffectsProfile() {
  // Add one or more effects profiles to array.
  // Refer to documentation for more details:
  // https://cloud.google.com/text-to-speech/docs/audio-profiles
  const effectsProfileId = ['telephony-class-application'];

  const request = {
    input: {text: text},
    voice: {languageCode: languageCode, ssmlGender: ssmlGender},
    audioConfig: {audioEncoding: 'MP3', effectsProfileId: effectsProfileId},
  };

  const [response] = await client.synthesizeSpeech(request);
  const writeFile = util.promisify(fs.writeFile);
  await writeFile(outputFile, response.audioContent, 'binary');
  console.log(`Audio content written to file: ${outputFile}`);
}

PHP

use Google\Cloud\TextToSpeech\V1\AudioConfig;
use Google\Cloud\TextToSpeech\V1\AudioEncoding;
use Google\Cloud\TextToSpeech\V1\SsmlVoiceGender;
use Google\Cloud\TextToSpeech\V1\SynthesisInput;
use Google\Cloud\TextToSpeech\V1\TextToSpeechClient;
use Google\Cloud\TextToSpeech\V1\VoiceSelectionParams;

/** Uncomment and populate these variables in your code */
// $text = 'Text to synthesize';
// $effectsProfileId = 'Audio Profile ID';

// create client object
$client = new TextToSpeechClient();

$inputText = (new SynthesisInput())
    ->setText($text);

// note: the voice can also be specified by name
// names of voices can be retrieved with $client->listVoices()
$voice = (new VoiceSelectionParams())
    ->setLanguageCode('en-US')
    ->setSsmlGender(SsmlVoiceGender::FEMALE);

// define effects profile id.
$audioConfig = (new AudioConfig())
    ->setAudioEncoding(AudioEncoding::MP3)
    ->setEffectsProfileId(array($effectsProfileId));

$response = $client->synthesizeSpeech($inputText, $voice, $audioConfig);
$audioContent = $response->getAudioContent();

file_put_contents('output.mp3', $audioContent);
print('Audio content written to "output.mp3"' . PHP_EOL);

$client->close();

Python

def synthesize_text_with_audio_profile(text, output, effects_profile_id):
    """Synthesizes speech from the input string of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(language_code="en-US")

    # Note: you can pass in multiple effects_profile_id. They will be applied
    # in the same order they are provided.
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,
        effects_profile_id=[effects_profile_id],
    )

    response = client.synthesize_speech(
        input=input_text, voice=voice, audio_config=audio_config
    )

    # The response's audio_content is binary.
    with open(output, "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "%s"' % output)