Managing datasets

This page describes how to delete and get information about your datasets.

For information about creating a dataset and importing data into it, see Creating datasets and importing data.

Before you begin

Before you can use AutoML Tables, you must have set up your project as described in Before you begin.

Listing datasets

A project can include numerous datasets. This section describes how to retrieve a list of the available datasets for a project.

Console

To see a list of the available datasets using the AutoML Tables UI, click the Datasets link at the top of the left navigation menu and select the Region.

REST

To list your datasets you use the datasets.list method.

Before using any of the request data, make the following replacements:

  • endpoint: automl.googleapis.com for the global location, and eu-automl.googleapis.com for the EU region.
  • project-id: your Google Cloud project ID.
  • location: the location for the resource: us-central1 for Global or eu for the European Union.

HTTP method and URL:

GET https://endpoint/v1beta1/projects/project-id/locations/location/datasets

To send your request, choose one of these options:

curl

Execute the following command:

curl -X GET \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "x-goog-user-project: project-id" \
"https://endpoint/v1beta1/projects/project-id/locations/location/datasets"

PowerShell

Execute the following command:

$cred = gcloud auth print-access-token
$headers = @{ "Authorization" = "Bearer $cred"; "x-goog-user-project" = "project-id" }

Invoke-WebRequest `
-Method GET `
-Headers $headers `
-Uri "https://endpoint/v1beta1/projects/project-id/locations/location/datasets" | Select-Object -Expand Content

You should receive a JSON response similar to the following:

{
      "name": "projects/29434381/locations/us-central1/datasets/TBL75559",
      "displayName": "test_dataset",
      "createTime": "2019-03-21T00:50:20.660378Z",
      "updateTime": "2019-08-23T19:32:52.025469Z",
      "etag": "AB3BwFoV4USmhM3pT8c6Y5AIA6n51dAmSuObc=",
      "exampleCount": 94356,
      "tablesDatasetMetadata": {
        "primaryTableSpecId": "16930321664",
        "targetColumnSpecId": "46579780096",
        "areStatsFresh": true,
        "targetColumnCorrelations": {
          "6788648672679690240": {
            "cramersV": 0.16511808788616378
          },
          "87292427152392192": {
            "cramersV": 0.20327159375043746
          },
          "2393135436366086144": {
            "cramersV": 0.15513206308654948
          },
          "9094491681893384192": {
            "cramersV": 0.021499396246101456
          },
          "7004821454793474048": {
            "cramersV": 0.030097587339321379
          }
        },
        "statsUpdateTime": "2019-08-16T01:43:38.583Z",
        "tablesDatasetType": "BASIC"
      }
    },
...

Java

If your resources are located in the EU region, you must explicitly set the endpoint. Learn more.

import com.google.cloud.automl.v1beta1.AutoMlClient;
import com.google.cloud.automl.v1beta1.Dataset;
import com.google.cloud.automl.v1beta1.ListDatasetsRequest;
import com.google.cloud.automl.v1beta1.LocationName;
import java.io.IOException;

class ListDatasets {

  static void listDatasets() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    listDatasets(projectId);
  }

  // List the datasets
  static void listDatasets(String projectId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // A resource that represents Google Cloud Platform location.
      LocationName projectLocation = LocationName.of(projectId, "us-central1");
      ListDatasetsRequest request =
          ListDatasetsRequest.newBuilder().setParent(projectLocation.toString()).build();

      // List all the datasets available in the region by applying filter.
      System.out.println("List of datasets:");
      for (Dataset dataset : client.listDatasets(request).iterateAll()) {
        // Display the dataset information
        System.out.format("%nDataset name: %s%n", dataset.getName());
        // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
        // required for other methods.
        // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
        String[] names = dataset.getName().split("/");
        String retrievedDatasetId = names[names.length - 1];
        System.out.format("Dataset id: %s%n", retrievedDatasetId);
        System.out.format("Dataset display name: %s%n", dataset.getDisplayName());
        System.out.println("Dataset create time:");
        System.out.format("\tseconds: %s%n", dataset.getCreateTime().getSeconds());
        System.out.format("\tnanos: %s%n", dataset.getCreateTime().getNanos());

        System.out.format("Tables dataset metadata: %s%n", dataset.getTablesDatasetMetadata());

      }
    }
  }
}

Node.js

If your resources are located in the EU region, you must explicitly set the endpoint. Learn more.

const automl = require('@google-cloud/automl');
const util = require('util');
const client = new automl.v1beta1.AutoMlClient();

/**
 * Demonstrates using the AutoML client to list all datasets.
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// const projectId = '[PROJECT_ID]' e.g., "my-gcloud-project";
// const computeRegion = '[REGION_NAME]' e.g., "us-central1";
// const filter = '[FILTER_EXPRESSIONS]' e.g., "tablesDatasetMetadata:*";

// A resource that represents Google Cloud Platform location.
const projectLocation = client.locationPath(projectId, computeRegion);

// List all the datasets available in the region by applying filter.
client
  .listDatasets({parent: projectLocation, filter: filter})
  .then(responses => {
    const dataset = responses[0];

    // Display the dataset information.
    console.log('List of datasets:');
    for (let i = 0; i < dataset.length; i++) {
      const tablesDatasetMetadata = dataset[i].tablesDatasetMetadata;

      console.log(`Dataset name: ${dataset[i].name}`);
      console.log(`Dataset Id: ${dataset[i].name.split('/').pop(-1)}`);
      console.log(`Dataset display name: ${dataset[i].displayName}`);
      console.log(`Dataset example count: ${dataset[i].exampleCount}`);
      console.log('Tables dataset metadata:');
      console.log(
        `\tTarget column correlations: ${util.inspect(
          tablesDatasetMetadata.targetColumnCorrelations,
          false,
          null
        )}`
      );
      console.log(
        `\tPrimary table spec Id: ${tablesDatasetMetadata.primaryTableSpecId}`
      );
      console.log(
        `\tTarget column spec Id: ${tablesDatasetMetadata.targetColumnSpecId}`
      );
      console.log(
        `\tWeight column spec Id: ${tablesDatasetMetadata.weightColumnSpecId}`
      );
      console.log(
        `\tMl use column spec Id: ${tablesDatasetMetadata.mlUseColumnSpecId}`
      );
    }
  })
  .catch(err => {
    console.error(err);
  });

Python

The client library for AutoML Tables includes additional Python methods that simplify using the AutoML Tables API. These methods refer to datasets and models by name instead of id. Your dataset and model names must be unique. For more information, see the Client reference.

If your resources are located in the EU region, you must explicitly set the endpoint. Learn more.

# TODO(developer): Uncomment and set the following variables
# project_id = 'PROJECT_ID_HERE'
# compute_region = 'COMPUTE_REGION_HERE'
# filter = 'filter expression here'

from google.cloud import automl_v1beta1 as automl

client = automl.TablesClient(project=project_id, region=compute_region)

# List all the datasets available in the region by applying filter.
response = client.list_datasets(filter=filter)

print("List of datasets:")
for dataset in response:
    # Display the dataset information.
    print(f"Dataset name: {dataset.name}")
    print("Dataset id: {}".format(dataset.name.split("/")[-1]))
    print(f"Dataset display name: {dataset.display_name}")
    metadata = dataset.tables_dataset_metadata
    print(
        "Dataset primary table spec id: {}".format(metadata.primary_table_spec_id)
    )
    print(
        "Dataset target column spec id: {}".format(metadata.target_column_spec_id)
    )
    print(
        "Dataset target column spec id: {}".format(metadata.target_column_spec_id)
    )
    print(
        "Dataset weight column spec id: {}".format(metadata.weight_column_spec_id)
    )
    print(
        "Dataset ml use column spec id: {}".format(metadata.ml_use_column_spec_id)
    )
    print(f"Dataset example count: {dataset.example_count}")
    print(f"Dataset create time: {dataset.create_time}")
    print("\n")

Deleting a dataset

Deleting a dataset removes the dataset permanently from your project. This operation does not delete any models created from that dataset. If you want to delete the models, you must delete them explicitly.

Console

  1. In the AutoML Tables UI, click the Datasets link at the top of the left navigation menu and select the Region to display the list of available datasets.

  2. Click the more actions menu at the far right of the row you want to delete and select Delete dataset.

    AutoML Tables schema page

  3. Click Confirm in the confirmation dialog box.

REST

To delete a dataset you use the datasets.delete method.

Before using any of the request data, make the following replacements:

  • endpoint: automl.googleapis.com for the global location, and eu-automl.googleapis.com for the EU region.
  • project-id: your Google Cloud project ID.
  • location: the location for the resource: us-central1 for Global or eu for the European Union.
  • dataset-id: the ID of the dataset you want to delete. For example, TBL6543.

HTTP method and URL:

DELETE https://endpoint/v1beta1/projects/project-id/locations/location/datasets/dataset-id

To send your request, choose one of these options:

curl

Execute the following command:

curl -X DELETE \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "x-goog-user-project: project-id" \
"https://endpoint/v1beta1/projects/project-id/locations/location/datasets/dataset-id"

PowerShell

Execute the following command:

$cred = gcloud auth print-access-token
$headers = @{ "Authorization" = "Bearer $cred"; "x-goog-user-project" = "project-id" }

Invoke-WebRequest `
-Method DELETE `
-Headers $headers `
-Uri "https://endpoint/v1beta1/projects/project-id/locations/location/datasets/dataset-id" | Select-Object -Expand Content

You should receive a JSON response similar to the following:

{
  "name": "projects/29452381/locations/us-central1/operations/TBL6543",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1beta1.OperationMetadata",
    "createTime": "2019-12-26T17:19:50.684850Z",
    "updateTime": "2019-12-26T17:19:50.684850Z",
    "deleteDetails": {},
    "worksOn": [
      "projects/29452381/locations/us-central1/datasets/TBL6543"
    ],
    "state": "DONE"
  },
  "done": true,
  "response": {
    "@type": "type.googleapis.com/google.protobuf.Empty"
  }
}

Deleting a dataset is a long-running operation. You can poll for the operation status or wait for the operation to return. Learn more.

Java

If your resources are located in the EU region, you must explicitly set the endpoint. Learn more.

import com.google.cloud.automl.v1beta1.AutoMlClient;
import com.google.cloud.automl.v1beta1.DatasetName;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class DeleteDataset {

  static void deleteDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    deleteDataset(projectId, datasetId);
  }

  // Delete a dataset
  static void deleteDataset(String projectId, String datasetId)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the full path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Empty response = client.deleteDatasetAsync(datasetFullId).get();
      System.out.format("Dataset deleted. %s%n", response);
    }
  }
}

Node.js

If your resources are located in the EU region, you must explicitly set the endpoint. Learn more.

const automl = require('@google-cloud/automl');
const client = new automl.v1beta1.AutoMlClient();

/**
 * Demonstrates using the AutoML client to delete a dataset.
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// const projectId = '[PROJECT_ID]' e.g., "my-gcloud-project";
// const computeRegion = '[REGION_NAME]' e.g., "us-central1";
// const datasetId = '[DATASET_ID]' e.g., "TBL2246891593778855936";

// Get the full path of the dataset.
const datasetFullId = client.datasetPath(projectId, computeRegion, datasetId);

// Delete a dataset.
client
  .deleteDataset({name: datasetFullId})
  .then(responses => {
    const operation = responses[0];
    return operation.promise();
  })
  .then(responses => {
    // The final result of the operation.
    const operationDetails = responses[2];

    // Get the dataset delete details.
    console.log('Dataset delete details:');
    console.log('\tOperation details:');
    console.log(`\t\tName: ${operationDetails.name}`);
    console.log(`\t\tDone: ${operationDetails.done}`);
  })
  .catch(err => {
    console.error(err);
  });

Python

The client library for AutoML Tables includes additional Python methods that simplify using the AutoML Tables API. These methods refer to datasets and models by name instead of id. Your dataset and model names must be unique. For more information, see the Client reference.

If your resources are located in the EU region, you must explicitly set the endpoint. Learn more.

# TODO(developer): Uncomment and set the following variables
# project_id = 'PROJECT_ID_HERE'
# compute_region = 'COMPUTE_REGION_HERE'
# dataset_display_name = 'DATASET_DISPLAY_NAME_HERE

from google.cloud import automl_v1beta1 as automl

client = automl.TablesClient(project=project_id, region=compute_region)

# Delete a dataset.
response = client.delete_dataset(dataset_display_name=dataset_display_name)

# synchronous check of operation status.
print(f"Dataset deleted. {response.result()}")

What's next