This legacy version of AutoML Tables is deprecated and will no longer be available on Google Cloud after January 23, 2024. All the functionality of legacy AutoML Tables and new features are available on the Vertex AI platform. See Migrate to Vertex AI to learn how to migrate your resources.

Create a dataset

Stay organized with collections Save and categorize content based on your preferences.

Demonstrates how to create a dataset.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

import com.google.cloud.automl.v1beta1.AutoMlClient;
import com.google.cloud.automl.v1beta1.Dataset;
import com.google.cloud.automl.v1beta1.LocationName;
import com.google.cloud.automl.v1beta1.TablesDatasetMetadata;
import java.io.IOException;

class TablesCreateDataset {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String displayName = "YOUR_DATASET_NAME";
    createDataset(projectId, displayName);
  }

  // Create a dataset
  static void createDataset(String projectId, String displayName) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // A resource that represents Google Cloud Platform location.
      LocationName projectLocation = LocationName.of(projectId, "us-central1");
      TablesDatasetMetadata metadata = TablesDatasetMetadata.newBuilder().build();
      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(displayName)
              .setTablesDatasetMetadata(metadata)
              .build();

      Dataset createdDataset = client.createDataset(projectLocation, dataset);

      // Display the dataset information.
      System.out.format("Dataset name: %s%n", createdDataset.getName());
      // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
      // required for other methods.
      // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
      String[] names = createdDataset.getName().split("/");
      String datasetId = names[names.length - 1];
      System.out.format("Dataset id: %s%n", datasetId);
    }
  }
}

Node.js

const automl = require('@google-cloud/automl');
const util = require('util');
const client = new automl.v1beta1.AutoMlClient();

/**
 * Demonstrates using the AutoML client to create a dataset
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// const projectId = '[PROJECT_ID]' e.g., "my-gcloud-project";
// const computeRegion = '[REGION_NAME]' e.g., "us-central1";
// const datasetName = '[DATASET_NAME]' e.g., “myDataset”;

// A resource that represents Google Cloud Platform location.
const projectLocation = client.locationPath(projectId, computeRegion);

// Set dataset name and metadata.
const myDataset = {
  displayName: datasetName,
  tablesDatasetMetadata: {},
};

// Create a dataset with the dataset metadata in the region.
client
  .createDataset({parent: projectLocation, dataset: myDataset})
  .then(responses => {
    const dataset = responses[0];
    // Display the dataset information.
    console.log(`Dataset name: ${dataset.name}`);
    console.log(`Dataset Id: ${dataset.name.split('/').pop(-1)}`);
    console.log(`Dataset display name: ${dataset.displayName}`);
    console.log(`Dataset example count: ${dataset.exampleCount}`);
    console.log(
      `Tables dataset metadata: ${util.inspect(
        dataset.tablesDatasetMetadata,
        false,
        null
      )}`
    );
  })
  .catch(err => {
    console.error(err);
  });

Python

# TODO(developer): Uncomment and set the following variables
# project_id = 'PROJECT_ID_HERE'
# compute_region = 'COMPUTE_REGION_HERE'
# dataset_display_name = 'DATASET_DISPLAY_NAME_HERE'

from google.cloud import automl_v1beta1 as automl

client = automl.TablesClient(project=project_id, region=compute_region)

# Create a dataset with the given display name
dataset = client.create_dataset(dataset_display_name)

# Display the dataset information.
print("Dataset name: {}".format(dataset.name))
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
print("Dataset display name: {}".format(dataset.display_name))
print("Dataset metadata:")
print("\t{}".format(dataset.tables_dataset_metadata))
print("Dataset example count: {}".format(dataset.example_count))
print("Dataset create time: {}".format(dataset.create_time))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.