Translate text in batches (Advanced edition only)

Translate a large volume of text.

Documentation pages that include this code sample

To view the code sample used in context, see the following documentation:

Code sample

C#

Before trying this sample, follow the C# setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation C# API reference documentation.


using System;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Translate.V3;

namespace GoogleCloudSamples
{
    public static class BatchTranslateText
    {
        /// <summary>
        /// Translates a batch of texts on GCS and stores the result in a GCS location.
        /// </summary>
        /// <param name="projectId">Your Google Cloud Project ID.</param>
        /// <param name="location">Region.</param>
        /// <param name="inputUri">The GCS path where input configuration is stored.</param>
        /// <param name="outputUri">The GCS path for translation output.</param>
        /// <param name="sourceLanguage">Source language code.</param>
        /// <param name="targetLanguage">Target language code.</param>
        public static void BatchTranslateTextSample(string inputUri = "gs://cloud-samples-data/text.txt",
            string outputUri = "gs://YOUR_BUCKET_ID/path_to_store_results/",
            string projectId = "[Google Cloud Project ID]",
            string location = "us-central1",
            string sourceLanguage = "en",
            string targetLanguage = "ja")
        {
            TranslationServiceClient translationServiceClient = TranslationServiceClient.Create();
            BatchTranslateTextRequest request = new BatchTranslateTextRequest
            {
                ParentAsLocationName = new LocationName(projectId, location),
                SourceLanguageCode = sourceLanguage,
                TargetLanguageCodes =
                {
                    targetLanguage,
                },
                InputConfigs =
                {
                    new InputConfig
                    {
                        GcsSource = new GcsSource
                        {
                            InputUri = inputUri,
                        },
                        MimeType = "text/plain",
                    },
                },
                OutputConfig = new OutputConfig
                {
                    GcsDestination = new GcsDestination
                    {
                        OutputUriPrefix = outputUri,
                    },
                },
            };
            // Poll until the returned long-running operation is completed.
            BatchTranslateResponse response = translationServiceClient.BatchTranslateText(request).PollUntilCompleted().Result;
            Console.WriteLine($"Total Characters: {response.TotalCharacters}");
            Console.WriteLine($"Translated Characters: {response.TranslatedCharacters}");
        }
    }

Go

Before trying this sample, follow the Go setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Go API reference documentation.

import (
	"context"
	"fmt"
	"io"

	translate "cloud.google.com/go/translate/apiv3"
	translatepb "google.golang.org/genproto/googleapis/cloud/translate/v3"
)

// batchTranslateText translates a large volume of text in asynchronous batch mode.
func batchTranslateText(w io.Writer, projectID string, location string, inputURI string, outputURI string, sourceLang string, targetLang string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// inputURI := "gs://cloud-samples-data/text.txt"
	// outputURI := "gs://YOUR_BUCKET_ID/path_to_store_results/"
	// sourceLang := "en"
	// targetLang := "ja"

	ctx := context.Background()
	client, err := translate.NewTranslationClient(ctx)
	if err != nil {
		return fmt.Errorf("NewTranslationClient: %v", err)
	}
	defer client.Close()

	req := &translatepb.BatchTranslateTextRequest{
		Parent:              fmt.Sprintf("projects/%s/locations/%s", projectID, location),
		SourceLanguageCode:  sourceLang,
		TargetLanguageCodes: []string{targetLang},
		InputConfigs: []*translatepb.InputConfig{
			{
				Source: &translatepb.InputConfig_GcsSource{
					GcsSource: &translatepb.GcsSource{InputUri: inputURI},
				},
				// Optional. Can be "text/plain" or "text/html".
				MimeType: "text/plain",
			},
		},
		OutputConfig: &translatepb.OutputConfig{
			Destination: &translatepb.OutputConfig_GcsDestination{
				GcsDestination: &translatepb.GcsDestination{
					OutputUriPrefix: outputURI,
				},
			},
		},
	}

	// The BatchTranslateText operation is async.
	op, err := client.BatchTranslateText(ctx, req)
	if err != nil {
		return fmt.Errorf("BatchTranslateText: %v", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	resp, err := op.Wait(ctx)
	if err != nil {
		return fmt.Errorf("Wait: %v", err)
	}

	fmt.Fprintf(w, "Total characters: %v\n", resp.GetTotalCharacters())
	fmt.Fprintf(w, "Translated characters: %v\n", resp.GetTranslatedCharacters())

	return nil
}

Java

Before trying this sample, follow the Java setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Java API reference documentation.

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.translate.v3.BatchTranslateMetadata;
import com.google.cloud.translate.v3.BatchTranslateResponse;
import com.google.cloud.translate.v3.BatchTranslateTextRequest;
import com.google.cloud.translate.v3.GcsDestination;
import com.google.cloud.translate.v3.GcsSource;
import com.google.cloud.translate.v3.InputConfig;
import com.google.cloud.translate.v3.LocationName;
import com.google.cloud.translate.v3.OutputConfig;
import com.google.cloud.translate.v3.TranslationServiceClient;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class BatchTranslateText {

  public static void batchTranslateText()
      throws InterruptedException, ExecutionException, IOException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR-PROJECT-ID";
    // Supported Languages: https://cloud.google.com/translate/docs/languages
    String sourceLanguage = "your-source-language";
    String targetLanguage = "your-target-language";
    String inputUri = "gs://your-gcs-bucket/path/to/input/file.txt";
    String outputUri = "gs://your-gcs-bucket/path/to/results/";
    batchTranslateText(projectId, sourceLanguage, targetLanguage, inputUri, outputUri);
  }

  // Batch translate text
  public static void batchTranslateText(
      String projectId,
      String sourceLanguage,
      String targetLanguage,
      String inputUri,
      String outputUri)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (TranslationServiceClient client = TranslationServiceClient.create()) {
      // Supported Locations: `us-central1`
      LocationName parent = LocationName.of(projectId, "us-central1");

      GcsSource gcsSource = GcsSource.newBuilder().setInputUri(inputUri).build();
      // Supported Mime Types: https://cloud.google.com/translate/docs/supported-formats
      InputConfig inputConfig =
          InputConfig.newBuilder().setGcsSource(gcsSource).setMimeType("text/plain").build();

      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setOutputUriPrefix(outputUri).build();
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

      BatchTranslateTextRequest request =
          BatchTranslateTextRequest.newBuilder()
              .setParent(parent.toString())
              .setSourceLanguageCode(sourceLanguage)
              .addTargetLanguageCodes(targetLanguage)
              .addInputConfigs(inputConfig)
              .setOutputConfig(outputConfig)
              .build();

      OperationFuture<BatchTranslateResponse, BatchTranslateMetadata> future =
          client.batchTranslateTextAsync(request);

      System.out.println("Waiting for operation to complete...");

      // random number between 300 - 450 (maximum allowed seconds)
      long randomNumber = ThreadLocalRandom.current().nextInt(300, 450);
      BatchTranslateResponse response = future.get(randomNumber, TimeUnit.SECONDS);

      System.out.printf("Total Characters: %s\n", response.getTotalCharacters());
      System.out.printf("Translated Characters: %s\n", response.getTranslatedCharacters());
    }
  }
}

Node.js

Before trying this sample, follow the Node.js setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Node.js API reference documentation.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const inputUri = 'gs://cloud-samples-data/text.txt';
// const outputUri = 'gs://YOUR_BUCKET_ID/path_to_store_results/';

// Imports the Google Cloud Translation library
const {TranslationServiceClient} = require('@google-cloud/translate');

// Instantiates a client
const translationClient = new TranslationServiceClient();
async function batchTranslateText() {
  // Construct request
  const request = {
    parent: `projects/${projectId}/locations/${location}`,
    sourceLanguageCode: 'en',
    targetLanguageCodes: ['ja'],
    inputConfigs: [
      {
        mimeType: 'text/plain', // mime types: text/plain, text/html
        gcsSource: {
          inputUri: inputUri,
        },
      },
    ],
    outputConfig: {
      gcsDestination: {
        outputUriPrefix: outputUri,
      },
    },
  };

  try {
    const options = {timeout: 180000};
    // Batch translate text using a long-running operation
    const [operation] = await translationClient.batchTranslateText(
      request,
      options
    );

    // Wait for operation to complete.
    const [response] = await operation.promise();

    console.log(`Total Characters: ${response.totalCharacters}`);
    console.log(`Translated Characters: ${response.translatedCharacters}`);
  } catch (error) {
    console.error(error.details);
  }
}

batchTranslateText();

PHP

Before trying this sample, follow the PHP setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation PHP API reference documentation.

use Google\Cloud\Translate\V3\GcsDestination;
use Google\Cloud\Translate\V3\GcsSource;
use Google\Cloud\Translate\V3\InputConfig;
use Google\Cloud\Translate\V3\OutputConfig;
use Google\Cloud\Translate\V3\TranslationServiceClient;

$translationServiceClient = new TranslationServiceClient();

/** Uncomment and populate these variables in your code */
// $inputUri = 'gs://cloud-samples-data/text.txt';
// $outputUri = 'gs://YOUR_BUCKET_ID/path_to_store_results/';
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $sourceLang = 'en';
// $targetLang = 'ja';
$targetLanguageCodes = [$targetLang];
$gcsSource = (new GcsSource())
    ->setInputUri($inputUri);

// Optional. Can be "text/plain" or "text/html".
$mimeType = 'text/plain';
$inputConfigsElement = (new InputConfig())
    ->setGcsSource($gcsSource)
    ->setMimeType($mimeType);
$inputConfigs = [$inputConfigsElement];
$gcsDestination = (new GcsDestination())
    ->setOutputUriPrefix($outputUri);
$outputConfig = (new OutputConfig())
    ->setGcsDestination($gcsDestination);
$formattedParent = $translationServiceClient->locationName($projectId, $location);

try {
    $operationResponse = $translationServiceClient->batchTranslateText(
        $formattedParent,
        $sourceLang,
        $targetLanguageCodes,
        $inputConfigs,
        $outputConfig
    );
    $operationResponse->pollUntilComplete();
    if ($operationResponse->operationSucceeded()) {
        $response = $operationResponse->getResult();
        printf('Total Characters: %s' . PHP_EOL, $response->getTotalCharacters());
        printf('Translated Characters: %s' . PHP_EOL, $response->getTranslatedCharacters());
    } else {
        $error = $operationResponse->getError();
        print($error->getMessage());
    }
} finally {
    $translationServiceClient->close();
}

Python

Before trying this sample, follow the Python setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Python API reference documentation.

from google.cloud import translate


def batch_translate_text(
    input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt",
    output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/",
    project_id="YOUR_PROJECT_ID",
    timeout=180,
):
    """Translates a batch of texts on GCS and stores the result in a GCS location."""

    client = translate.TranslationServiceClient()

    location = "us-central1"
    # Supported file types: https://cloud.google.com/translate/docs/supported-formats
    gcs_source = {"input_uri": input_uri}

    input_configs_element = {
        "gcs_source": gcs_source,
        "mime_type": "text/plain",  # Can be "text/plain" or "text/html".
    }
    gcs_destination = {"output_uri_prefix": output_uri}
    output_config = {"gcs_destination": gcs_destination}
    parent = f"projects/{project_id}/locations/{location}"

    # Supported language codes: https://cloud.google.com/translate/docs/language
    operation = client.batch_translate_text(
        request={
            "parent": parent,
            "source_language_code": "en",
            "target_language_codes": ["ja"],  # Up to 10 language codes here.
            "input_configs": [input_configs_element],
            "output_config": output_config,
        }
    )

    print("Waiting for operation to complete...")
    response = operation.result(timeout)

    print("Total Characters: {}".format(response.total_characters))
    print("Translated Characters: {}".format(response.translated_characters))

Ruby

Before trying this sample, follow the Ruby setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Ruby API reference documentation.

require "google/cloud/translate"

# input_uri = "gs://cloud-samples-data/text.txt"
# output_uri = "gs://YOUR_BUCKET_ID/path_to_store_results/"
# project_id = "[Google Cloud Project ID]"
# location_id = "[LOCATION ID]"

source_lang = "en"
target_lang = "ja"
# Optional. Can be "text/plain" or "text/html".
mime_type = "text/plain"

client = Google::Cloud::Translate.translation_service

parent = client.location_path project: project_id, location: location_id
input_config = {
  mime_type:  mime_type,
  gcs_source: { input_uri: input_uri }
}
output_config = {
  gcs_destination: { output_uri_prefix: output_uri }
}

operation = client.batch_translate_text(
  parent:                parent,
  source_language_code:  source_lang,
  target_language_codes: [target_lang],
  input_configs:         [input_config],
  output_config:         output_config
)

# Wait until the long running operation is done
operation.wait_until_done!

response = operation.response

puts "Total Characters: #{response.total_characters}"
puts "Translated Characters: #{response.translated_characters}"