Getting punctuation

This page describes how to get punctuation like periods and commas in transcription results from Speech-to-Text.

By default, Cloud Speech-to-Text does not include punctuation marks in the results from speech recognition. You can, however, request that Cloud Speech-to-Text detect and insert punctuation in transcription results. Cloud Speech-to-Text can recognize commas, question marks, and periods in transcription requests.

To enable automatic punctuation, set the enableAutomaticPunctuation field to true in the parameters for the request. The Speech-to-Text API supports automatic punctuation for all speech recognition methods: , , and .

The following code samples demonstrate how to get punctuation details in a transcription request.


Refer to the API endpoint for complete details.

To perform synchronous speech recognition, make a POST request and provide the appropriate request body. The following shows an example of a POST request using curl. The example uses the access token for a service account set up for the project using the Google Cloud Cloud SDK. For instructions on installing the Cloud SDK, setting up a project with a service account, and obtaining an access token, see the quickstart.

curl -s -H "Content-Type: application/json" \
    -H "Authorization: Bearer "$(gcloud auth print-access-token) \ \
    --data '{
  "config": {
    "sampleRateHertz": 16000,
    "languageCode": "en-US",
    "enableAutomaticPunctuation": true
  "audio": {

See the reference documentation for more information on configuring the request body.

If the request is successful, the server returns a 200 OK HTTP status code and the response in JSON format:

  "results": [
      "alternatives": [
          "transcript": "How old is the Brooklyn Bridge?",
          "confidence": 0.98360395


func autoPunctuation(w io.Writer, path string) error {
	ctx := context.Background()

	client, err := speech.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)

	// path = "../testdata/commercial_mono.wav"
	data, err := ioutil.ReadFile(path)
	if err != nil {
		return fmt.Errorf("ReadFile: %v", err)

	resp, err := client.Recognize(ctx, &speechpb.RecognizeRequest{
		Config: &speechpb.RecognitionConfig{
			Encoding:        speechpb.RecognitionConfig_LINEAR16,
			SampleRateHertz: 8000,
			LanguageCode:    "en-US",
			// Enable automatic punctuation.
			EnableAutomaticPunctuation: true,
		Audio: &speechpb.RecognitionAudio{
			AudioSource: &speechpb.RecognitionAudio_Content{Content: data},
	if err != nil {
		return fmt.Errorf("Recognize: %v", err)

	for i, result := range resp.Results {
		fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
		fmt.Fprintf(w, "Result %d\n", i+1)
		for j, alternative := range result.Alternatives {
			fmt.Fprintf(w, "Alternative %d: %s\n", j+1, alternative.Transcript)
	return nil


 * Performs transcription on remote FLAC file and prints the transcription.
 * @param gcsUri the path to the remote FLAC audio file to transcribe.
public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
  try (SpeechClient speechClient = SpeechClient.create()) {
    // Configure request with raw PCM audio
    RecognitionConfig config =

    // Set the remote path for the audio file
    RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();

    // Use non-blocking call for getting file transcription
    OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
        speechClient.longRunningRecognizeAsync(config, audio);

    while (!response.isDone()) {
      System.out.println("Waiting for response...");

    // Just print the first result here.
    SpeechRecognitionResult result = response.get().getResultsList().get(0);

    // There can be several alternative transcripts for a given chunk of speech. Just use the
    // first (most likely) one here.
    SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);

    // Print out the result
    System.out.printf("Transcript : %s\n", alternative.getTranscript());


// Imports the Google Cloud client library for API
 * TODO(developer): Update client library import to use new
 * version of API when desired features become available
const speech = require('@google-cloud/speech');
const fs = require('fs');

// Creates a client
const client = new speech.SpeechClient();

 * TODO(developer): Uncomment the following lines before running the sample.
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
  encoding: encoding,
  languageCode: languageCode,
  enableAutomaticPunctuation: true,
const audio = {
  content: fs.readFileSync(filename).toString('base64'),

const request = {
  config: config,
  audio: audio,

// Detects speech in the audio file
const [response] = await client.recognize(request);
const transcription = response.results
  .map(result => result.alternatives[0].transcript)
console.log(`Transcription: `, transcription);


from import speech
client = speech.SpeechClient()

# path = 'resources/commercial_mono.wav'
with, 'rb') as audio_file:
    content =

audio = speech.types.RecognitionAudio(content=content)
config = speech.types.RecognitionConfig(
    # Enable automatic punctuation

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
    alternative = result.alternatives[0]
    print('-' * 20)
    print('First alternative of result {}'.format(i))
    print('Transcript: {}'.format(alternative.transcript))


static object SyncRecognizePunctuation(string filePath)
    var speech = SpeechClient.Create();
    var response = speech.Recognize(new RecognitionConfig()
        Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
        SampleRateHertz = 8000,
        LanguageCode = "en",
        EnableAutomaticPunctuation = true,
    }, RecognitionAudio.FromFile(filePath));
    foreach (var result in response.Results)
        foreach (var alternative in result.Alternatives)
    return 0;


use Google\Cloud\Speech\V1\SpeechClient;
use Google\Cloud\Speech\V1\RecognitionAudio;
use Google\Cloud\Speech\V1\RecognitionConfig;
use Google\Cloud\Speech\V1\RecognitionConfig\AudioEncoding;

/** Uncomment and populate these variables in your code */
// $audioFile = 'path to an audio file';

// change these variables if necessary
$encoding = AudioEncoding::LINEAR16;
$sampleRateHertz = 32000;
$languageCode = 'en-US';

// get contents of a file into a string
$content = file_get_contents($audioFile);

// set string as audio content
$audio = (new RecognitionAudio())

// set config
$config = (new RecognitionConfig())

// create the speech client
$client = new SpeechClient();

// make the API call
$response = $client->recognize($config, $audio);
$results = $response->getResults();

// print results
foreach ($results as $result) {
    $alternatives = $result->getAlternatives();
    $mostLikely = $alternatives[0];
    $transcript = $mostLikely->getTranscript();
    $confidence = $mostLikely->getConfidence();
    printf('Transcript: %s' . PHP_EOL, $transcript);
    printf('Confidence: %s' . PHP_EOL, $confidence);



# audio_file_path = "path/to/audio.wav"

require "google/cloud/speech"

speech =

config = {
  encoding:                     :LINEAR16,
  sample_rate_hertz:            8000,
  language_code:                "en-US",
  enable_automatic_punctuation: true

audio_file = File.binread audio_file_path
audio      = { content: audio_file }

operation = speech.long_running_recognize config, audio

puts "Operation started"


raise operation.results.message if operation.error?

results = operation.response.results

results.each_with_index do |result, i|
  alternative = result.alternatives.first
  puts "-" * 20
  puts "First alternative of result #{i}"
  puts "Transcript: #{alternative.transcript}"

What's next

Trang này có hữu ích không? Hãy cho chúng tôi biết đánh giá của bạn:

Gửi phản hồi về...

Cloud Speech-to-Text Documentation
Bạn cần trợ giúp? Truy cập trang hỗ trợ của chúng tôi.