Classify content of a Cloud Storage file

Analyze a file stored in Google Cloud Storage and return a list of content categories that apply to the text found in the document

Code sample


func classifyTextFromGCS(ctx context.Context, gcsURI string) (*languagepb.ClassifyTextResponse, error) {
	return client.ClassifyText(ctx, &languagepb.ClassifyTextRequest{
		Document: &languagepb.Document{
			Source: &languagepb.Document_GcsContentUri{
				GcsContentUri: gcsURI,
			Type: languagepb.Document_PLAIN_TEXT,


// Instantiate the Language client
try (LanguageServiceClient language = LanguageServiceClient.create()) {
  // set the GCS content URI path
  Document doc =
  ClassifyTextRequest request = ClassifyTextRequest.newBuilder().setDocument(doc).build();
  // detect categories in the given file
  ClassifyTextResponse response = language.classifyText(request);

  for (ClassificationCategory category : response.getCategoriesList()) {
        "Category name : %s, Confidence : %.3f\n",
        category.getName(), category.getConfidence());


// Imports the Google Cloud client library.
const language = require('@google-cloud/language');

// Creates a client.
const client = new language.LanguageServiceClient();

 * TODO(developer): Uncomment the following lines to run this code
// const bucketName = 'Your bucket name, e.g. my-bucket';
// const fileName = 'Your file name, e.g. my-file.txt';

// Prepares a document, representing a text file in Cloud Storage
const document = {
  gcsContentUri: `gs://${bucketName}/${fileName}`,
  type: 'PLAIN_TEXT',

// Classifies text in the document
const [classification] = await client.classifyText({document});

classification.categories.forEach(category => {
  console.log(`Name: ${}, Confidence: ${category.confidence}`);


from import language_v1

def sample_classify_text(gcs_content_uri):
    Classifying Content in text file stored in Cloud Storage

      gcs_content_uri Google Cloud Storage URI where the file content is located.
      e.g. gs://[Your Bucket]/[Path to File]
      The text file must include at least 20 words.

    client = language_v1.LanguageServiceClient()

    # gcs_content_uri = 'gs://cloud-samples-data/language/classify-entertainment.txt'

    # Available types: PLAIN_TEXT, HTML
    type_ = language_v1.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    language = "en"
    document = {"gcs_content_uri": gcs_content_uri, "type_": type_, "language": language}

    response = client.classify_text(request = {'document': document})
    # Loop through classified categories returned from the API
    for category in response.categories:
        # Get the name of the category representing the document.
        # See the predefined taxonomy of categories:
        print(u"Category name: {}".format(
        # Get the confidence. Number representing how certain the classifier
        # is that this category represents the provided text.
        print(u"Confidence: {}".format(category.confidence))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser