Create a dataset for text

Stay organized with collections Save and categorize content based on your preferences.

Creates a dataset for text using the create_dataset method.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

To learn how to install and use the client library for Vertex AI, see Vertex AI client libraries. For more information, see the Vertex AI Java API reference documentation.


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateDatasetTextSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetDisplayName = "YOUR_DATASET_DISPLAY_NAME";

    createDatasetTextSample(project, datasetDisplayName);
  }

  static void createDatasetTextSample(String project, String datasetDisplayName)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String metadataSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml";

      LocationName locationName = LocationName.of(project, location);
      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(datasetDisplayName)
              .setMetadataSchemaUri(metadataSchemaUri)
              .build();

      OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
          datasetServiceClient.createDatasetAsync(locationName, dataset);
      System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());

      System.out.println("Waiting for operation to finish...");
      Dataset datasetResponse = datasetFuture.get(180, TimeUnit.SECONDS);

      System.out.println("Create Text Dataset Response");
      System.out.format("\tName: %s\n", datasetResponse.getName());
      System.out.format("\tDisplay Name: %s\n", datasetResponse.getDisplayName());
      System.out.format("\tMetadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
      System.out.format("\tMetadata: %s\n", datasetResponse.getMetadata());
      System.out.format("\tCreate Time: %s\n", datasetResponse.getCreateTime());
      System.out.format("\tUpdate Time: %s\n", datasetResponse.getUpdateTime());
      System.out.format("\tLabels: %s\n", datasetResponse.getLabelsMap());
    }
  }
}

Node.js

To learn how to install and use the client library for Vertex AI, see Vertex AI client libraries. For more information, see the Vertex AI Node.js API reference documentation.

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetDisplayName = "YOUR_DATASTE_DISPLAY_NAME";
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function createDatasetText() {
  // Configure the parent resource
  const parent = `projects/${project}/locations/${location}`;
  // Configure the dataset resource
  const dataset = {
    displayName: datasetDisplayName,
    metadataSchemaUri:
      'gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml',
  };
  const request = {
    parent,
    dataset,
  };

  // Create Dataset Request
  const [response] = await datasetServiceClient.createDataset(request);
  console.log(`Long running operation: ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Create dataset text response');
  console.log(`Name : ${result.name}`);
  console.log(`Display name : ${result.displayName}`);
  console.log(`Metadata schema uri : ${result.metadataSchemaUri}`);
  console.log(`Metadata : ${JSON.stringify(result.metadata)}`);
  console.log(`Labels : ${JSON.stringify(result.labels)}`);
}
createDatasetText();

Python

To learn how to install and use the client library for Vertex AI, see Vertex AI client libraries. For more information, see the Vertex AI Python API reference documentation.

from google.cloud import aiplatform


def create_dataset_text_sample(
    project: str,
    display_name: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    timeout: int = 300,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.DatasetServiceClient(client_options=client_options)
    dataset = {
        "display_name": display_name,
        "metadata_schema_uri": "gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml",
    }
    parent = f"projects/{project}/locations/{location}"
    response = client.create_dataset(parent=parent, dataset=dataset)
    print("Long running operation:", response.operation.name)
    create_dataset_response = response.result(timeout=timeout)
    print("create_dataset_response:", create_dataset_response)

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.