Criar um job de rotulagem de dados

Cria um job de rotulagem de dados usando o método create_data_labeling_job.

Mais informações

Para ver a documentação detalhada que inclui este exemplo de código, consulte:

Exemplo de código

Java

Antes de testar esse exemplo, siga as instruções de configuração para Java no Guia de início rápido da Vertex AI sobre como usar bibliotecas de cliente. Para mais informações, consulte a documentação de referência da API Vertex AI para Java.

Para autenticar na Vertex AI, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.


import com.google.cloud.aiplatform.v1.DataLabelingJob;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.JobServiceClient;
import com.google.cloud.aiplatform.v1.JobServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import com.google.type.Money;
import java.io.IOException;
import java.util.Map;

public class CreateDataLabelingJobSample {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String displayName = "YOUR_DATA_LABELING_DISPLAY_NAME";
    String datasetId = "YOUR_DATASET_ID";
    String instructionUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_data_labeling_source/file.pdf";
    String inputsSchemaUri = "YOUR_INPUT_SCHEMA_URI";
    String annotationSpec = "YOUR_ANNOTATION_SPEC";
    createDataLabelingJob(
        project, displayName, datasetId, instructionUri, inputsSchemaUri, annotationSpec);
  }

  static void createDataLabelingJob(
      String project,
      String displayName,
      String datasetId,
      String instructionUri,
      String inputsSchemaUri,
      String annotationSpec)
      throws IOException {
    JobServiceSettings jobServiceSettings =
        JobServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (JobServiceClient jobServiceClient = JobServiceClient.create(jobServiceSettings)) {
      String location = "us-central1";
      LocationName locationName = LocationName.of(project, location);

      String jsonString = "{\"annotation_specs\": [ " + annotationSpec + "]}";
      Value.Builder annotationSpecValue = Value.newBuilder();
      JsonFormat.parser().merge(jsonString, annotationSpecValue);

      DatasetName datasetName = DatasetName.of(project, location, datasetId);
      DataLabelingJob dataLabelingJob =
          DataLabelingJob.newBuilder()
              .setDisplayName(displayName)
              .setLabelerCount(1)
              .setInstructionUri(instructionUri)
              .setInputsSchemaUri(inputsSchemaUri)
              .addDatasets(datasetName.toString())
              .setInputs(annotationSpecValue)
              .putAnnotationLabels(
                  "aiplatform.googleapis.com/annotation_set_name", "my_test_saved_query")
              .build();

      DataLabelingJob dataLabelingJobResponse =
          jobServiceClient.createDataLabelingJob(locationName, dataLabelingJob);

      System.out.println("Create Data Labeling Job Response");
      System.out.format("\tName: %s\n", dataLabelingJobResponse.getName());
      System.out.format("\tDisplay Name: %s\n", dataLabelingJobResponse.getDisplayName());
      System.out.format("\tDatasets: %s\n", dataLabelingJobResponse.getDatasetsList());
      System.out.format("\tLabeler Count: %s\n", dataLabelingJobResponse.getLabelerCount());
      System.out.format("\tInstruction Uri: %s\n", dataLabelingJobResponse.getInstructionUri());
      System.out.format("\tInputs Schema Uri: %s\n", dataLabelingJobResponse.getInputsSchemaUri());
      System.out.format("\tInputs: %s\n", dataLabelingJobResponse.getInputs());
      System.out.format("\tState: %s\n", dataLabelingJobResponse.getState());
      System.out.format("\tLabeling Progress: %s\n", dataLabelingJobResponse.getLabelingProgress());
      System.out.format("\tCreate Time: %s\n", dataLabelingJobResponse.getCreateTime());
      System.out.format("\tUpdate Time: %s\n", dataLabelingJobResponse.getUpdateTime());
      System.out.format("\tLabels: %s\n", dataLabelingJobResponse.getLabelsMap());
      System.out.format(
          "\tSpecialist Pools: %s\n", dataLabelingJobResponse.getSpecialistPoolsList());
      for (Map.Entry<String, String> annotationLabelMap :
          dataLabelingJobResponse.getAnnotationLabelsMap().entrySet()) {
        System.out.println("\tAnnotation Level");
        System.out.format("\t\tkey: %s\n", annotationLabelMap.getKey());
        System.out.format("\t\tvalue: %s\n", annotationLabelMap.getValue());
      }
      Money money = dataLabelingJobResponse.getCurrentSpend();

      System.out.println("\tCurrent Spend");
      System.out.format("\t\tCurrency Code: %s\n", money.getCurrencyCode());
      System.out.format("\t\tUnits: %s\n", money.getUnits());
      System.out.format("\t\tNanos: %s\n", money.getNanos());
    }
  }
}

Python

Antes de testar essa amostra, siga as instruções de configuração para Python Guia de início rápido da Vertex AI: como usar bibliotecas de cliente. Para mais informações, consulte a documentação de referência da API Vertex AI para Python.

Para autenticar na Vertex AI, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.

from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value

def create_data_labeling_job_sample(
    project: str,
    display_name: str,
    dataset_name: str,
    instruction_uri: str,
    inputs_schema_uri: str,
    annotation_spec: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.JobServiceClient(client_options=client_options)
    inputs_dict = {"annotation_specs": [annotation_spec]}
    inputs = json_format.ParseDict(inputs_dict, Value())

    data_labeling_job = {
        "display_name": display_name,
        # Full resource name: projects/{project_id}/locations/{location}/datasets/{dataset_id}
        "datasets": [dataset_name],
        # labeler_count must be 1, 3, or 5
        "labeler_count": 1,
        "instruction_uri": instruction_uri,
        "inputs_schema_uri": inputs_schema_uri,
        "inputs": inputs,
        "annotation_labels": {
            "aiplatform.googleapis.com/annotation_set_name": "my_test_saved_query"
        },
    }
    parent = f"projects/{project}/locations/{location}"
    response = client.create_data_labeling_job(
        parent=parent, data_labeling_job=data_labeling_job
    )
    print("response:", response)

A seguir

Para pesquisar e filtrar exemplos de código de outros produtos do Google Cloud, consulte a pesquisa de exemplos de código do Google Cloud.