Importazione di dati per l'estrazione delle entità di testo

importa i dati per l'estrazione delle entità di testo utilizzando il metodo import_data.

Per saperne di più

Per la documentazione dettagliata che include questo esempio di codice, consulta quanto segue:

Esempio di codice

Java

Per informazioni su come installare e utilizzare la libreria client per Vertex AI, consulta le librerie client di Vertex AI. Per maggiori informazioni, consulta la documentazione di riferimento dell'API Java di AI AI.


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataTextEntityExtractionSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsSourceUri = "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_text_source/[file.jsonl]";

    importDataTextEntityExtractionSample(project, datasetId, gcsSourceUri);
  }

  static void importDataTextEntityExtractionSample(
      String project, String datasetId, String gcsSourceUri)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "text_extraction_io_format_1.0.0.yaml";

      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);
      DatasetName datasetName = DatasetName.of(project, location, datasetId);

      List<ImportDataConfig> importDataConfigList =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigList);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());

      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(300, TimeUnit.SECONDS);
      System.out.format(
          "Import Data Text Entity Extraction Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

Per informazioni su come installare e utilizzare la libreria client per Vertex AI, consulta le librerie client di Vertex AI. Per ulteriori informazioni, consulta la documentazione di riferimento dell'API Vertex AI.js.

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 */

// const datasetId = "YOUR_DATASET_ID";
// const gcsSourceUri = "YOUR_GCS_SOURCE_URI";
// eg. "gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]"
// const project = "YOUR_PROJECT_ID";
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataTextEntityExtraction() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/text_extraction_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Import data request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  const [importDataResponse] = await response.promise();

  console.log(
    `Import data text entity extraction response  : \
      ${JSON.stringify(importDataResponse.result)}`
  );
}
importDataTextEntityExtraction();

Python

Per informazioni su come installare e utilizzare la libreria client per Vertex AI, consulta le librerie client di Vertex AI. Per maggiori informazioni, consulta la documentazione di riferimento dell'API Python AI Vertex.

from google.cloud import aiplatform

def import_data_text_entity_extraction_sample(
    project: str,
    dataset_id: str,
    gcs_source_uri: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    timeout: int = 1800,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.DatasetServiceClient(client_options=client_options)
    import_configs = [
        {
            "gcs_source": {"uris": [gcs_source_uri]},
            "import_schema_uri": "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_extraction_io_format_1.0.0.yaml",
        }
    ]
    name = client.dataset_path(project=project, location=location, dataset=dataset_id)
    response = client.import_data(name=name, import_configs=import_configs)
    print("Long running operation:", response.operation.name)
    import_data_response = response.result(timeout=timeout)
    print("import_data_response:", import_data_response)

Passaggi successivi

Per cercare e filtrare esempi di codice per altri prodotti Google Cloud, vedi il browser di esempio Google Cloud.