Creazione di un job di etichettatura dati

Crea un job di etichettatura dati utilizzando il metodo create_data_labeling_job.

Esempio di codice

Java

Prima di provare questo esempio, segui le istruzioni di configurazione Java riportate nella guida rapida all'utilizzo delle librerie client di Vertex AI. Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Java di Vertex AI.

Per autenticarti in Vertex AI, configura le credenziali predefinite dell'applicazione. Per ulteriori informazioni, vedi Configura l'autenticazione per un ambiente di sviluppo locale.


import com.google.cloud.aiplatform.v1.DataLabelingJob;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.JobServiceClient;
import com.google.cloud.aiplatform.v1.JobServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import com.google.type.Money;
import java.io.IOException;
import java.util.Map;

public class CreateDataLabelingJobSample {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String displayName = "YOUR_DATA_LABELING_DISPLAY_NAME";
    String datasetId = "YOUR_DATASET_ID";
    String instructionUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_data_labeling_source/file.pdf";
    String inputsSchemaUri = "YOUR_INPUT_SCHEMA_URI";
    String annotationSpec = "YOUR_ANNOTATION_SPEC";
    createDataLabelingJob(
        project, displayName, datasetId, instructionUri, inputsSchemaUri, annotationSpec);
  }

  static void createDataLabelingJob(
      String project,
      String displayName,
      String datasetId,
      String instructionUri,
      String inputsSchemaUri,
      String annotationSpec)
      throws IOException {
    JobServiceSettings jobServiceSettings =
        JobServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (JobServiceClient jobServiceClient = JobServiceClient.create(jobServiceSettings)) {
      String location = "us-central1";
      LocationName locationName = LocationName.of(project, location);

      String jsonString = "{\"annotation_specs\": [ " + annotationSpec + "]}";
      Value.Builder annotationSpecValue = Value.newBuilder();
      JsonFormat.parser().merge(jsonString, annotationSpecValue);

      DatasetName datasetName = DatasetName.of(project, location, datasetId);
      DataLabelingJob dataLabelingJob =
          DataLabelingJob.newBuilder()
              .setDisplayName(displayName)
              .setLabelerCount(1)
              .setInstructionUri(instructionUri)
              .setInputsSchemaUri(inputsSchemaUri)
              .addDatasets(datasetName.toString())
              .setInputs(annotationSpecValue)
              .putAnnotationLabels(
                  "aiplatform.googleapis.com/annotation_set_name", "my_test_saved_query")
              .build();

      DataLabelingJob dataLabelingJobResponse =
          jobServiceClient.createDataLabelingJob(locationName, dataLabelingJob);

      System.out.println("Create Data Labeling Job Response");
      System.out.format("\tName: %s\n", dataLabelingJobResponse.getName());
      System.out.format("\tDisplay Name: %s\n", dataLabelingJobResponse.getDisplayName());
      System.out.format("\tDatasets: %s\n", dataLabelingJobResponse.getDatasetsList());
      System.out.format("\tLabeler Count: %s\n", dataLabelingJobResponse.getLabelerCount());
      System.out.format("\tInstruction Uri: %s\n", dataLabelingJobResponse.getInstructionUri());
      System.out.format("\tInputs Schema Uri: %s\n", dataLabelingJobResponse.getInputsSchemaUri());
      System.out.format("\tInputs: %s\n", dataLabelingJobResponse.getInputs());
      System.out.format("\tState: %s\n", dataLabelingJobResponse.getState());
      System.out.format("\tLabeling Progress: %s\n", dataLabelingJobResponse.getLabelingProgress());
      System.out.format("\tCreate Time: %s\n", dataLabelingJobResponse.getCreateTime());
      System.out.format("\tUpdate Time: %s\n", dataLabelingJobResponse.getUpdateTime());
      System.out.format("\tLabels: %s\n", dataLabelingJobResponse.getLabelsMap());
      System.out.format(
          "\tSpecialist Pools: %s\n", dataLabelingJobResponse.getSpecialistPoolsList());
      for (Map.Entry<String, String> annotationLabelMap :
          dataLabelingJobResponse.getAnnotationLabelsMap().entrySet()) {
        System.out.println("\tAnnotation Level");
        System.out.format("\t\tkey: %s\n", annotationLabelMap.getKey());
        System.out.format("\t\tvalue: %s\n", annotationLabelMap.getValue());
      }
      Money money = dataLabelingJobResponse.getCurrentSpend();

      System.out.println("\tCurrent Spend");
      System.out.format("\t\tCurrency Code: %s\n", money.getCurrencyCode());
      System.out.format("\t\tUnits: %s\n", money.getUnits());
      System.out.format("\t\tNanos: %s\n", money.getNanos());
    }
  }
}

Python

Prima di provare questo esempio, segui le istruzioni di configurazione Python riportate nella guida rapida all'utilizzo delle librerie client di Vertex AI. Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Python di Vertex AI.

Per autenticarti in Vertex AI, configura le credenziali predefinite dell'applicazione. Per maggiori informazioni, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.

from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def create_data_labeling_job_sample(
    project: str,
    display_name: str,
    dataset_name: str,
    instruction_uri: str,
    inputs_schema_uri: str,
    annotation_spec: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.JobServiceClient(client_options=client_options)
    inputs_dict = {"annotation_specs": [annotation_spec]}
    inputs = json_format.ParseDict(inputs_dict, Value())

    data_labeling_job = {
        "display_name": display_name,
        # Full resource name: projects/{project_id}/locations/{location}/datasets/{dataset_id}
        "datasets": [dataset_name],
        # labeler_count must be 1, 3, or 5
        "labeler_count": 1,
        "instruction_uri": instruction_uri,
        "inputs_schema_uri": inputs_schema_uri,
        "inputs": inputs,
        "annotation_labels": {
            "aiplatform.googleapis.com/annotation_set_name": "my_test_saved_query"
        },
    }
    parent = f"projects/{project}/locations/{location}"
    response = client.create_data_labeling_job(
        parent=parent, data_labeling_job=data_labeling_job
    )
    print("response:", response)

Passaggi successivi

Per cercare ed eseguire filtri sugli esempi di codice per altri prodotti Google Cloud, consulta il browser di esempi di Google Cloud.