Translate text with model in batches (Advanced edition only)

Translate a large volume of text with model.

Documentation pages that include this code sample

To view the code sample used in context, see the following documentation:

Code Sample

C#

Before trying this sample, follow the C# setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation C# API reference documentation.


using Google.Api.Gax.ResourceNames;
using Google.Cloud.Translate.V3;
using System;

namespace GoogleCloudSamples
{
    public static class BatchTranslateTextWithModel
    {
        /// <summary>
        /// Batch translate text using Translation model.
        /// Model can be AutoML or General[built-in] model.
        /// </summary>
        /// <param name="targetLanguage">Target language code.</param>
        /// <param name="sourceLanguage">Required. Source language code.</param>
        /// <param name="modelId">A model is used for Translation.
        /// <param name="projectId">Your Google Cloud Project ID.</param>
        /// <param name="inputUri">The GCS path where input configuration is stored.</param>
        /// <param name="outputUri">The GCS path for translation output.</param>
        /// <param name="location"> Region.</param>
        public static void BatchTranslateTextWithModelSample(
            string inputUri = "gs://cloud-samples-data/translation/custom_model_text.txt'",
            string outputUri = "gs://YOUR_BUCKET_ID/path_to_store_results/",
            string projectId = "[Google Cloud Project ID]",
            string location = "us-central1",
            string targetLanguage = "ja",
            string sourceLanguage = "en",
            string modelId = "[YOUR-MODEL-ID]")
        {
            TranslationServiceClient translationServiceClient = TranslationServiceClient.Create();
            string modelPath = $"projects/{projectId}/locations/{location}/models/{modelId}";

            BatchTranslateTextRequest request = new BatchTranslateTextRequest
            {
                ParentAsLocationName = new LocationName(projectId, location),
                SourceLanguageCode = sourceLanguage,
                TargetLanguageCodes =
                {
                    // Target language code.
                    targetLanguage,
                },
                InputConfigs =
                {
                    new InputConfig
                    {
                        GcsSource = new GcsSource
                        {
                            InputUri = inputUri,
                        },
                        MimeType = "text/plain",
                    },
                },
                OutputConfig = new OutputConfig
                {
                    GcsDestination = new GcsDestination
                    {
                        OutputUriPrefix = outputUri,
                    },
                },
                Models =
                {
                    // A model is used for Translation.
                    { targetLanguage,  modelPath},
                },
            };
            // Poll until the returned long-running operation is completed.
            BatchTranslateResponse response = translationServiceClient.BatchTranslateText(request).PollUntilCompleted().Result;
            Console.WriteLine($"Total Characters: {response.TotalCharacters}");
            Console.WriteLine($"Translated Characters: {response.TranslatedCharacters}");
        }
    }

Go

Before trying this sample, follow the Go setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Go API reference documentation.

import (
	"context"
	"fmt"
	"io"

	translate "cloud.google.com/go/translate/apiv3"
	translatepb "google.golang.org/genproto/googleapis/cloud/translate/v3"
)

// batchTranslateTextWithModel translates a large volume of text in asynchronous batch mode.
func batchTranslateTextWithModel(w io.Writer, projectID string, location string, inputURI string, outputURI string, sourceLang string, targetLang string, modelID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// inputURI := "gs://cloud-samples-data/text.txt"
	// outputURI := "gs://YOUR_BUCKET_ID/path_to_store_results/"
	// sourceLang := "en"
	// targetLang := "de"
	// modelID := "your-model-id"

	ctx := context.Background()
	client, err := translate.NewTranslationClient(ctx)
	if err != nil {
		return fmt.Errorf("NewTranslationClient: %v", err)
	}
	defer client.Close()

	req := &translatepb.BatchTranslateTextRequest{
		Parent:              fmt.Sprintf("projects/%s/locations/%s", projectID, location),
		SourceLanguageCode:  sourceLang,
		TargetLanguageCodes: []string{targetLang},
		InputConfigs: []*translatepb.InputConfig{
			{
				Source: &translatepb.InputConfig_GcsSource{
					GcsSource: &translatepb.GcsSource{InputUri: inputURI},
				},
				// Optional. Can be "text/plain" or "text/html".
				MimeType: "text/plain",
			},
		},
		OutputConfig: &translatepb.OutputConfig{
			Destination: &translatepb.OutputConfig_GcsDestination{
				GcsDestination: &translatepb.GcsDestination{
					OutputUriPrefix: outputURI,
				},
			},
		},
		Models: map[string]string{
			targetLang: fmt.Sprintf("projects/%s/locations/%s/models/%s", projectID, location, modelID),
		},
	}

	// The BatchTranslateText operation is async.
	op, err := client.BatchTranslateText(ctx, req)
	if err != nil {
		return fmt.Errorf("BatchTranslateText: %v", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	resp, err := op.Wait(ctx)
	if err != nil {
		return fmt.Errorf("Wait: %v", err)
	}

	fmt.Fprintf(w, "Total characters: %v\n", resp.GetTotalCharacters())
	fmt.Fprintf(w, "Translated characters: %v\n", resp.GetTranslatedCharacters())

	return nil
}

Java

Before trying this sample, follow the Java setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Java API reference documentation.

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.translate.v3.BatchTranslateMetadata;
import com.google.cloud.translate.v3.BatchTranslateResponse;
import com.google.cloud.translate.v3.BatchTranslateTextRequest;
import com.google.cloud.translate.v3.GcsDestination;
import com.google.cloud.translate.v3.GcsSource;
import com.google.cloud.translate.v3.InputConfig;
import com.google.cloud.translate.v3.LocationName;
import com.google.cloud.translate.v3.OutputConfig;
import com.google.cloud.translate.v3.TranslationServiceClient;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class BatchTranslateTextWithModel {

  public static void batchTranslateTextWithModel()
      throws InterruptedException, ExecutionException, IOException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR-PROJECT-ID";
    // Supported Languages: https://cloud.google.com/translate/docs/languages
    String sourceLanguage = "your-source-language";
    String targetLanguage = "your-target-language";
    String inputUri = "gs://your-gcs-bucket/path/to/input/file.txt";
    String outputUri = "gs://your-gcs-bucket/path/to/results/";
    String modelId = "YOUR-MODEL-ID";
    batchTranslateTextWithModel(
        projectId, sourceLanguage, targetLanguage, inputUri, outputUri, modelId);
  }

  // Batch translate text using AutoML Translation model
  public static void batchTranslateTextWithModel(
      String projectId,
      String sourceLanguage,
      String targetLanguage,
      String inputUri,
      String outputUri,
      String modelId)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (TranslationServiceClient client = TranslationServiceClient.create()) {
      // Supported Locations: `global`, [glossary location], or [model location]
      // Glossaries must be hosted in `us-central1`
      // Custom Models must use the same location as your model. (us-central1)
      String location = "us-central1";
      LocationName parent = LocationName.of(projectId, location);

      // Configure the source of the file from a GCS bucket
      GcsSource gcsSource = GcsSource.newBuilder().setInputUri(inputUri).build();
      // Supported Mime Types: https://cloud.google.com/translate/docs/supported-formats
      InputConfig inputConfig =
          InputConfig.newBuilder().setGcsSource(gcsSource).setMimeType("text/plain").build();

      // Configure where to store the output in a GCS bucket
      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setOutputUriPrefix(outputUri).build();
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

      // Configure the model used in the request
      String modelPath =
          String.format("projects/%s/locations/%s/models/%s", projectId, location, modelId);

      // Build the request that will be sent to the API
      BatchTranslateTextRequest request =
          BatchTranslateTextRequest.newBuilder()
              .setParent(parent.toString())
              .setSourceLanguageCode(sourceLanguage)
              .addTargetLanguageCodes(targetLanguage)
              .addInputConfigs(inputConfig)
              .setOutputConfig(outputConfig)
              .putModels(targetLanguage, modelPath)
              .build();

      // Start an asynchronous request
      OperationFuture<BatchTranslateResponse, BatchTranslateMetadata> future =
          client.batchTranslateTextAsync(request);

      System.out.println("Waiting for operation to complete...");

      // random number between 300 - 450 (maximum allowed seconds)
      long randomNumber = ThreadLocalRandom.current().nextInt(300, 450);
      BatchTranslateResponse response = future.get(randomNumber, TimeUnit.SECONDS);

      // Display the translation for each input text provided
      System.out.printf("Total Characters: %s\n", response.getTotalCharacters());
      System.out.printf("Translated Characters: %s\n", response.getTranslatedCharacters());
    }
  }
}

Node.js

Before trying this sample, follow the Node.js setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Node.js API reference documentation.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const inputUri = 'gs://cloud-samples-data/text.txt';
// const outputUri = 'gs://YOUR_BUCKET_ID/path_to_store_results/';
// const modelId = 'YOUR_MODEL_ID';

// Imports the Google Cloud Translation library
const {TranslationServiceClient} = require('@google-cloud/translate');

// Instantiates a client
const client = new TranslationServiceClient();
async function batchTranslateTextWithModel() {
  // Construct request
  const request = {
    parent: `projects/${projectId}/locations/${location}`,
    sourceLanguageCode: 'en',
    targetLanguageCodes: ['ja'],
    inputConfigs: [
      {
        mimeType: 'text/plain', // mime types: text/plain, text/html
        gcsSource: {
          inputUri: inputUri,
        },
      },
    ],
    outputConfig: {
      gcsDestination: {
        outputUriPrefix: outputUri,
      },
    },
    models: {
      ja: `projects/${projectId}/locations/${location}/models/${modelId}`,
    },
  };

  try {
    const options = {timeout: 180000};
    // Create a job using a long-running operation
    const [operation] = await client.batchTranslateText(request, options);

    // Wait for the operation to complete
    const [response] = await operation.promise();

    // Display the translation for each input text provided
    console.log(`Total Characters: ${response.totalCharacters}`);
    console.log(`Translated Characters: ${response.translatedCharacters}`);
  } catch (error) {
    console.error(error.details);
  }
}

batchTranslateTextWithModel();

PHP

Before trying this sample, follow the PHP setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation PHP API reference documentation.

use Google\Cloud\Translate\V3\GcsDestination;
use Google\Cloud\Translate\V3\GcsSource;
use Google\Cloud\Translate\V3\InputConfig;
use Google\Cloud\Translate\V3\OutputConfig;
use Google\Cloud\Translate\V3\TranslationServiceClient;

$translationServiceClient = new TranslationServiceClient();

/** Uncomment and populate these variables in your code */
// $inputUri = 'gs://cloud-samples-data/text.txt';
// $outputUri = 'gs://YOUR_BUCKET_ID/path_to_store_results/';
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $targetLanguage = 'en';
// $sourceLanguage = 'de';
// $modelId = '{your-model-id}';
$modelPath = sprintf(
    'projects/%s/locations/%s/models/%s',
    $projectId,
    $location,
    $modelId
);
$targetLanguageCodes = [$targetLanguage];
$gcsSource = (new GcsSource())
    ->setInputUri($inputUri);

// Optional. Can be "text/plain" or "text/html".
$mimeType = 'text/plain';
$inputConfigsElement = (new InputConfig())
    ->setGcsSource($gcsSource)
    ->setMimeType($mimeType);
$inputConfigs = [$inputConfigsElement];
$gcsDestination = (new GcsDestination())
    ->setOutputUriPrefix($outputUri);
$outputConfig = (new OutputConfig())
    ->setGcsDestination($gcsDestination);
$formattedParent = $translationServiceClient->locationName($projectId, $location);
$models = ['ja' => $modelPath];

try {
    $operationResponse = $translationServiceClient->batchTranslateText(
        $formattedParent,
        $sourceLanguage,
        $targetLanguageCodes,
        $inputConfigs,
        $outputConfig,
        ['models' => $models]
    );
    $operationResponse->pollUntilComplete();
    if ($operationResponse->operationSucceeded()) {
        $response = $operationResponse->getResult();
        // Display the translation for each input text provided
        printf('Total Characters: %s' . PHP_EOL, $response->getTotalCharacters());
        printf('Translated Characters: %s' . PHP_EOL, $response->getTranslatedCharacters());
    } else {
        $error = $operationResponse->getError();
        print($error->getMessage());
    }
} finally {
    $translationServiceClient->close();
}

Python

Before trying this sample, follow the Python setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Python API reference documentation.

from google.cloud import translate


def batch_translate_text_with_model(
    input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt",
    output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/",
    project_id="YOUR_PROJECT_ID",
    model_id="YOUR_MODEL_ID",
):
    """Batch translate text using Translation model.
    Model can be AutoML or General[built-in] model."""

    client = translate.TranslationServiceClient()

    # Supported file types: https://cloud.google.com/translate/docs/supported-formats
    gcs_source = {"input_uri": input_uri}
    location = "us-central1"

    input_configs_element = {
        "gcs_source": gcs_source,
        "mime_type": "text/plain",  # Can be "text/plain" or "text/html".
    }
    gcs_destination = {"output_uri_prefix": output_uri}
    output_config = {"gcs_destination": gcs_destination}
    parent = f"projects/{project_id}/locations/{location}"

    model_path = "projects/{}/locations/{}/models/{}".format(
        project_id, location, model_id  # The location of AutoML model.
    )

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    models = {"ja": model_path}  # takes a target lang as key.

    operation = client.batch_translate_text(
        request={
            "parent": parent,
            "source_language_code": "en",
            "target_language_codes": ["ja"],  # Up to 10 language codes here.
            "input_configs": [input_configs_element],
            "output_config": output_config,
            "models": models,
        }
    )

    print("Waiting for operation to complete...")
    response = operation.result()

    # Display the translation for each input text provided.
    print("Total Characters: {}".format(response.total_characters))
    print("Translated Characters: {}".format(response.translated_characters))

Ruby

Before trying this sample, follow the Ruby setup instructions in the Translation Quickstart Using Client Libraries. For more information, see the Translation Ruby API reference documentation.

require "google/cloud/translate"

# input_uri = "gs://cloud-samples-data/text.txt"
# output_uri = "gs://YOUR_BUCKET_ID/path_to_store_results/"
# project_id = "[Google Cloud Project ID]"
# location_id = "[LOCATION ID]"
# model_id = "[MODEL ID]"

source_lang = "en"
target_lang = "ja"
# Optional. Can be "text/plain" or "text/html".
mime_type = "text/plain"

client = Google::Cloud::Translate.translation_service

parent = client.location_path project: project_id, location: location_id
model = "projects/#{project_id}/locations/#{location_id}/models/#{model_id}"
models = { target_lang => model }
input_config = {
  mime_type:  mime_type,
  gcs_source: { input_uri: input_uri }
}
output_config = {
  gcs_destination: { output_uri_prefix: output_uri }
}

operation = client.batch_translate_text(
  parent:                parent,
  source_language_code:  source_lang,
  target_language_codes: [target_lang],
  input_configs:         [input_config],
  output_config:         output_config,
  models:                models
)

# Wait until the long running operation is done
operation.wait_until_done!

response = operation.response

puts "Total Characters: #{response.total_characters}"
puts "Translated Characters: #{response.translated_characters}"