Managing datasets

A project can have multiple datasets, each used to train a separate model. You can get a list of the available datasets and can delete datasets you no longer need.

Listing datasets

A project can include numerous datasets. This section describes how to retrieve a list of the available datasets for a project.

Web UI

To see a list of the available datasets using the AutoML Vision UI, click the Datasets link at the top of the left navigation menu.

Dataset list page

To see the datasets for a different project, select the project from the drop-down list in the upper right of the title bar.

Integrated UI

To see a list of the available datasets using the Vision Dashboard, click the Datasets link at the top of the left navigation menu.

Dataset list page

To see the datasets for a different project, select the project from the drop-down list in the upper right of the title bar.

Command-line

curl \
  -H "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
  -H "Content-Type: application/json" \
  https://automl.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/us-central1/datasets

You should see output similar to the following:

{
  "datasets": [
    {
      "name": "projects/434039606874/locations/us-central1/datasets/356587829854924648",
      "displayName": "test_dataset",
      "createTime": "2018-04-26T18:02:59.825060Z",
      "imageClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    },
    {
      "name": "projects/434039606874/locations/us-central1/datasets/3104518874390609379",
      "displayName": "test",
      "createTime": "2017-12-16T01:10:38.328280Z",
      "imageClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    }
  ]
}

Python

# TODO(developer): Uncomment and set the following variables
# project_id = 'PROJECT_ID_HERE'
# compute_region = 'COMPUTE_REGION_HERE'
# filter_ = 'filter expression here'

from google.cloud import automl_v1beta1 as automl

client = automl.AutoMlClient()

# A resource that represents Google Cloud Platform location.
project_location = client.location_path(project_id, compute_region)

# List all the datasets available in the region by applying filter.
response = client.list_datasets(project_location, filter_)

print("List of datasets:")
for dataset in response:
    # Display the dataset information.
    print("Dataset name: {}".format(dataset.name))
    print("Dataset id: {}".format(dataset.name.split("/")[-1]))
    print("Dataset display name: {}".format(dataset.display_name))
    print("Image classification dataset metadata:")
    print("\t{}".format(dataset.image_classification_dataset_metadata))
    print("Dataset example count: {}".format(dataset.example_count))
    print("Dataset create time:")
    print("\tseconds: {}".format(dataset.create_time.seconds))
    print("\tnanos: {}".format(dataset.create_time.nanos))

Java

/**
 * Demonstrates using the AutoML client to list all datasets.
 *
 * @param projectId the Id of the project.
 * @param computeRegion the Region name.
 * @param filter the Filter expression.
 */
static void listDatasets(String projectId, String computeRegion, String filter) {
  // Instantiates a client
  try (AutoMlClient client = AutoMlClient.create()) {

    // A resource that represents Google Cloud Platform location.
    LocationName projectLocation = LocationName.of(projectId, computeRegion);

    // Build the List datasets request
    ListDatasetsRequest request =
        ListDatasetsRequest.newBuilder()
            .setParent(projectLocation.toString())
            .setFilter(filter)
            .build();

    // List all the datasets available in the region by applying the filter.
    System.out.print("List of datasets:");
    for (Dataset dataset : client.listDatasets(request).iterateAll()) {
      // Display the dataset information
      System.out.println(String.format("\nDataset name: %s", dataset.getName()));
      System.out.println(
          String.format(
              "Dataset id: %s",
              dataset.getName().split("/")[dataset.getName().split("/").length - 1]));
      System.out.println(String.format("Dataset display name: %s", dataset.getDisplayName()));
      System.out.println("Image classification dataset specification:");
      System.out.print(String.format("\t%s", dataset.getImageClassificationDatasetMetadata()));
      System.out.println(String.format("Dataset example count: %d", dataset.getExampleCount()));
      System.out.println("Dataset create time:");
      System.out.println(String.format("\tseconds: %s", dataset.getCreateTime().getSeconds()));
      System.out.println(String.format("\tnanos: %s", dataset.getCreateTime().getNanos()));
    }
  } catch (IOException e) {
    e.printStackTrace();
  }
}

Node.js

  async function automlVisionListDatasets() {
    const automl = require(`@google-cloud/automl`).v1beta1;

    const client = new automl.AutoMlClient();
    /**
     * TODO(developer): Uncomment the following line before running the sample.
     */
    // const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`;
    // const computeRegion = `region-name, e.g. "us-central1"`;
    // const filter = `filter expressions, must specify field e.g. “imageClassificationModelMetadata:*”`;

    // A resource that represents Google Cloud Platform location.
    const projectLocation = client.locationPath(projectId, computeRegion);

    // List all the datasets available in the region by applying filter.
    const [datasets] = await client.listDatasets({
      parent: projectLocation,
      filter: filter,
    });
    console.log(`List of datasets:`);
    datasets.forEach(dataset => {
      console.log(`Dataset name: ${dataset.name}`);
      console.log(`Dataset Id: ${dataset.name.split(`/`).pop(-1)}`);
      console.log(`Dataset display name: ${dataset.displayName}`);
      console.log(`Dataset example count: ${dataset.exampleCount}`);
      console.log(`Image Classification type:`);
      console.log(
        `\t`,
        dataset.imageClassificationDatasetMetadata.classificationType
      );
      console.log(`Dataset create time: `);
      console.log(`\tseconds: ${dataset.createTime.seconds}`);
      console.log(`\tnanos: ${dataset.createTime.nanos}`);
      console.log(`\n`);
    });
  }

  automlVisionListDatasets().catch(console.error);

Deleting a dataset

Web UI

  1. In the AutoML Vision UI, click the Datasets link at the top of the left navigation menu to display the list of available datasets.

  2. Click the three-dot menu at the far right of the row you want to delete and select Delete dataset.

  3. Click Delete in the confirmation dialog box.

    Dataset list page

Integrated UI

  1. In the Vision Dashboard, click the Datasets link at the top of the left navigation menu to display the list of available datasets.

  2. Click the three-dot menu at the far right of the row you want to delete and select Delete dataset.

  3. Click Delete in the confirmation dialog box.

    Dataset list page

Command-line

  • Replace dataset-name with the full name of your dataset, from the response when you created the dataset. The full name has the format: projects/{project-id}/locations/us-central1/datasets/{dataset-id}
curl -X DELETE \
  -H "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
  -H "Content-Type: application/json" https://automl.googleapis.com/v1beta1/dataset-name

You should see output similar to the following:

{
  "name": "projects/434039606874/locations/us-central1/operations/3512013641657611176",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1beta1.OperationMetadata",
    "createTime": "2018-05-04T01:45:16.735340Z",
    "updateTime": "2018-05-04T01:45:16.735360Z",
    "cancellable": true
  }
}

Python

# TODO(developer): Uncomment and set the following variables
# project_id = 'PROJECT_ID_HERE'
# compute_region = 'COMPUTE_REGION_HERE'
# dataset_id = 'DATASET_ID_HERE'

from google.cloud import automl_v1beta1 as automl

client = automl.AutoMlClient()

# Get the full path of the dataset.
dataset_full_id = client.dataset_path(
    project_id, compute_region, dataset_id
)

# Delete a dataset.
response = client.delete_dataset(dataset_full_id)

# synchronous check of operation status.
print("Dataset deleted. {}".format(response.result()))

Java

/**
 * Delete a dataset.
 *
 * @param projectId the Id of the project.
 * @param computeRegion the Region name.
 * @param datasetId the Id of the dataset.
 */
static void deleteDataset(String projectId, String computeRegion, String datasetId) {
  // Instantiates a client
  try (AutoMlClient client = AutoMlClient.create()) {

    // Get the complete path of the dataset.
    DatasetName datasetFullId = DatasetName.of(projectId, computeRegion, datasetId);

    // Delete a dataset.
    Empty response = client.deleteDatasetAsync(datasetFullId).get();

    System.out.println(String.format("Dataset deleted. %s", response));
  } catch (IOException | InterruptedException | ExecutionException e) {
    e.printStackTrace();
  }
}

Node.js

  async function automlVisionDeleteDataset() {
    const automl = require(`@google-cloud/automl`).v1beta1;

    const client = new automl.AutoMlClient();

    /**
     * TODO(developer): Uncomment the following line before running the sample.
     */
    // const projectId = `The GCLOUD_PROJECT string, e.g. "my-gcloud-project"`;
    // const computeRegion = `region-name, e.g. "us-central1"`;
    // const datasetId = `Id of the dataset`;

    // Get the full path of the dataset.
    const datasetFullId = client.datasetPath(
      projectId,
      computeRegion,
      datasetId
    );

    // Delete a dataset.
    const [operation] = await client.deleteDataset({name: datasetFullId});
    const [, , response] = await operation.promise();
    // The final result of the operation.
    if (response.done) {
      console.log(`Dataset deleted.`);
    }
  }

  automlVisionDeleteDataset().catch(console.error);

Was this page helpful? Let us know how we did:

Send feedback about...

Cloud AutoML Vision
Need help? Visit our support page.