This legacy version of AI Platform Data Labeling is deprecated and will no longer be available on Google Cloud after January 23, 2024. All the functionality of legacy AI Platform Data Labeling and new features are available on the Vertex AI platform. See Migrate to Vertex AI to learn how to migrate your resources.

Label text

Stay organized with collections Save and categorize content based on your preferences.

Start a text labeling task.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

To learn how to install and use the client library for Data Labeling Service, see Data Labeling Service client libraries. For more information, see the Data Labeling Service Java API reference documentation.

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.datalabeling.v1beta1.AnnotatedDataset;
import com.google.cloud.datalabeling.v1beta1.DataLabelingServiceClient;
import com.google.cloud.datalabeling.v1beta1.DataLabelingServiceSettings;
import com.google.cloud.datalabeling.v1beta1.HumanAnnotationConfig;
import com.google.cloud.datalabeling.v1beta1.LabelOperationMetadata;
import com.google.cloud.datalabeling.v1beta1.LabelTextRequest;
import com.google.cloud.datalabeling.v1beta1.LabelTextRequest.Feature;
import com.google.cloud.datalabeling.v1beta1.SentimentConfig;
import com.google.cloud.datalabeling.v1beta1.TextClassificationConfig;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class LabelText {

  // Start a Text Labeling Task
  static void labelText(
      String formattedInstructionName,
      String formattedAnnotationSpecSetName,
      String formattedDatasetName)
      throws IOException {
    // String formattedInstructionName = DataLabelingServiceClient.formatInstructionName(
    //      "YOUR_PROJECT_ID", "YOUR_INSTRUCTION_UUID");
    // String formattedAnnotationSpecSetName =
    //     DataLabelingServiceClient.formatAnnotationSpecSetName(
    //         "YOUR_PROJECT_ID", "YOUR_ANNOTATION_SPEC_SET_UUID");
    // String formattedDatasetName = DataLabelingServiceClient.formatDatasetName(
    //      "YOUR_PROJECT_ID", "YOUR_DATASET_UUID");


    DataLabelingServiceSettings settings =
        DataLabelingServiceSettings.newBuilder()
            .build();
    try (DataLabelingServiceClient dataLabelingServiceClient =
        DataLabelingServiceClient.create(settings)) {
      HumanAnnotationConfig humanAnnotationConfig =
          HumanAnnotationConfig.newBuilder()
              .setAnnotatedDatasetDisplayName("annotated_displayname")
              .setAnnotatedDatasetDescription("annotated_description")
              .setLanguageCode("en-us")
              .setInstruction(formattedInstructionName)
              .build();

      SentimentConfig sentimentConfig =
          SentimentConfig.newBuilder().setEnableLabelSentimentSelection(false).build();

      TextClassificationConfig textClassificationConfig =
          TextClassificationConfig.newBuilder()
              .setAnnotationSpecSet(formattedAnnotationSpecSetName)
              .setSentimentConfig(sentimentConfig)
              .build();

      LabelTextRequest labelTextRequest =
          LabelTextRequest.newBuilder()
              .setParent(formattedDatasetName)
              .setBasicConfig(humanAnnotationConfig)
              .setTextClassificationConfig(textClassificationConfig)
              .setFeature(Feature.TEXT_CLASSIFICATION)
              .build();

      OperationFuture<AnnotatedDataset, LabelOperationMetadata> operation =
          dataLabelingServiceClient.labelTextAsync(labelTextRequest);

      // You'll want to save this for later to retrieve your completed operation.
      // System.out.format("Operation Name: %s\n", operation.getName());

      // Cancel the operation to avoid charges when testing.
      dataLabelingServiceClient.getOperationsClient().cancelOperation(operation.getName());

    } catch (IOException | InterruptedException | ExecutionException e) {
      e.printStackTrace();
    }
  }
}

Python

To learn how to install and use the client library for Data Labeling Service, see Data Labeling Service client libraries. For more information, see the Data Labeling Service Python API reference documentation.

def label_text(
    dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name
):
    """Labels a text dataset."""
    from google.cloud import datalabeling_v1beta1 as datalabeling

    client = datalabeling.DataLabelingServiceClient()

    basic_config = datalabeling.HumanAnnotationConfig(
        instruction=instruction_resource_name,
        annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME",
        label_group="YOUR_LABEL_GROUP",
        replica_count=1,
    )

    feature = datalabeling.LabelTextRequest.Feature.TEXT_ENTITY_EXTRACTION

    config = datalabeling.TextEntityExtractionConfig(
        annotation_spec_set=annotation_spec_set_resource_name
    )

    response = client.label_text(
        request={
            "parent": dataset_resource_name,
            "basic_config": basic_config,
            "feature": feature,
            "text_classification_config": config,
        }
    )

    print("Label_text operation name: {}".format(response.operation.name))
    return response

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.