Créer un ensemble de données pour Cloud Storage tabulaire

Crée un ensemble de données pour Cloud Storage tabulaire à l'aide de la méthode create_dataset.

En savoir plus

Pour obtenir une documentation détaillée incluant cet exemple de code, consultez ce qui suit :

Exemple de code

Java

Pour savoir comment installer et utiliser la bibliothèque cliente pour Vertex AI, consultez la page Bibliothèques clientes Vertex AI. Pour en savoir plus, consultez la documentation de référence de l'API Vertex AI en langage Java.


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateDatasetTabularGcsSample {

  public static void main(String[] args)
      throws InterruptedException, ExecutionException, TimeoutException, IOException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetDisplayName = "YOUR_DATASET_DISPLAY_NAME";
    String gcsSourceUri = "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_gcs_table/file.csv";
    ;
    createDatasetTableGcs(project, datasetDisplayName, gcsSourceUri);
  }

  static void createDatasetTableGcs(String project, String datasetDisplayName, String gcsSourceUri)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    DatasetServiceSettings settings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient = DatasetServiceClient.create(settings)) {
      String location = "us-central1";
      String metadataSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/metadata/tables_1.0.0.yaml";
      LocationName locationName = LocationName.of(project, location);

      String jsonString =
          "{\"input_config\": {\"gcs_source\": {\"uri\": [\"" + gcsSourceUri + "\"]}}}";
      Value.Builder metaData = Value.newBuilder();
      JsonFormat.parser().merge(jsonString, metaData);

      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(datasetDisplayName)
              .setMetadataSchemaUri(metadataSchemaUri)
              .setMetadata(metaData)
              .build();

      OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
          datasetServiceClient.createDatasetAsync(locationName, dataset);
      System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      Dataset datasetResponse = datasetFuture.get(300, TimeUnit.SECONDS);

      System.out.println("Create Dataset Table GCS sample");
      System.out.format("Name: %s\n", datasetResponse.getName());
      System.out.format("Display Name: %s\n", datasetResponse.getDisplayName());
      System.out.format("Metadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
      System.out.format("Metadata: %s\n", datasetResponse.getMetadata());
    }
  }
}

Node.js

Pour savoir comment installer et utiliser la bibliothèque cliente pour Vertex AI, consultez la page Bibliothèques clientes Vertex AI. Pour en savoir plus, consultez la documentation de référence de l'API Vertex AI en langage Node.js.

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetDisplayName = 'YOUR_DATASET_DISPLAY_NAME';
// const gcsSourceUri = 'YOUR_GCS_SOURCE_URI';
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function createDatasetTabularGcs() {
  // Configure the parent resource
  const parent = `projects/${project}/locations/${location}`;
  const metadata = {
    structValue: {
      fields: {
        inputConfig: {
          structValue: {
            fields: {
              gcsSource: {
                structValue: {
                  fields: {
                    uri: {
                      listValue: {
                        values: [{stringValue: gcsSourceUri}],
                      },
                    },
                  },
                },
              },
            },
          },
        },
      },
    },
  };
  // Configure the dataset resource
  const dataset = {
    displayName: datasetDisplayName,
    metadataSchemaUri:
      'gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml',
    metadata: metadata,
  };
  const request = {
    parent,
    dataset,
  };

  // Create dataset request
  const [response] = await datasetServiceClient.createDataset(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Create dataset tabular gcs response');
  console.log(`\tName : ${result.name}`);
  console.log(`\tDisplay name : ${result.displayName}`);
  console.log(`\tMetadata schema uri : ${result.metadataSchemaUri}`);
  console.log(`\tMetadata : ${JSON.stringify(result.metadata)}`);
}
createDatasetTabularGcs();

Python

Pour savoir comment installer et utiliser la bibliothèque cliente pour Vertex AI, consultez la page Bibliothèques clientes Vertex AI. Pour en savoir plus, consultez la documentation de référence de l'API Vertex AI en langage Python.

from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value

def create_dataset_tabular_gcs_sample(
    project: str,
    display_name: str,
    gcs_uri: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    timeout: int = 300,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.DatasetServiceClient(client_options=client_options)
    metadata_dict = {"input_config": {"gcs_source": {"uri": [gcs_uri]}}}
    metadata = json_format.ParseDict(metadata_dict, Value())

    dataset = {
        "display_name": display_name,
        "metadata_schema_uri": "gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml",
        "metadata": metadata,
    }
    parent = f"projects/{project}/locations/{location}"
    response = client.create_dataset(parent=parent, dataset=dataset)
    print("Long running operation:", response.operation.name)
    create_dataset_response = response.result(timeout=timeout)
    print("create_dataset_response:", create_dataset_response)

Étape suivante

Pour rechercher et filtrer des exemples de code pour d'autres produits Google Cloud, consultez l'explorateur d'exemples Google Cloud.