Managing datasets

A project can have multiple datasets, each used to train a separate model. You can get a list of the available datasets, get a specific dataset, export a dataset and can delete a dataset you no longer need.

Listing datasets

This section describes how to retrieve a list of the available datasets for a project.

Web UI

To see a list of the available datasets using the Vision Dashboard, click the Datasets link at the top of the left navigation menu.

Dataset list page

To see the datasets for a different project, select the project from the list in the drop-down located in the upper right of the title bar.

REST & CMD LINE

Before using any of the request data below, make the following replacements:

  • project-id: your GCP project ID.

HTTP method and URL:

GET https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets

To send your request, choose one of these options:

curl

Execute the following command:

curl -X GET \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets

PowerShell

Execute the following command:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method GET `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets" | Select-Object -Expand Content

You should receive a JSON response similar to the following:

{
  "datasets": [
    {
      "name": "projects/project-id/locations/us-central1/datasets/dataset-id",
      "displayName": "my_new_dataset",
      "createTime": "2019-10-29T17:31:12.010290Z",
      "etag": "AB3BwFpNUaguCwKeQWtUKLBPQhZr7omCCUBz77pACPIINFpyFe7vbGhp9oZLEEGhIeM=",
      "exampleCount": 3667,
      "imageClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    },
    {
      "name": "projects/project-id/locations/us-central1/datasets/dataset-id",
      "displayName": "new_dataset",
      "createTime": "2019-10-02T00:44:57.821275Z",
      "etag": "AB3BwFpU_ueMZtTD_8dt-9r8BWqunqMC76YbAbmQYQsQEbtQTxs6U3rPpgAMDCXhYPGq",
      "imageClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    }
  ]
}

Go

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"google.golang.org/api/iterator"
	automlpb "google.golang.org/genproto/googleapis/cloud/automl/v1"
)

// listDatasets lists existing datasets.
func listDatasets(w io.Writer, projectID string, location string) error {
	// projectID := "my-project-id"
	// location := "us-central1"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)
	}
	defer client.Close()

	req := &automlpb.ListDatasetsRequest{
		Parent: fmt.Sprintf("projects/%s/locations/%s", projectID, location),
	}

	it := client.ListDatasets(ctx, req)

	// Iterate over all results
	for {
		dataset, err := it.Next()
		if err == iterator.Done {
			break
		}
		if err != nil {
			return fmt.Errorf("ListGlossaries.Next: %v", err)
		}

		fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
		fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
		fmt.Fprintf(w, "Dataset create time:\n")
		fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
		fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

		// Vision classification
		if metadata := dataset.GetImageClassificationDatasetMetadata(); metadata != nil {
			fmt.Fprintf(w, "Image classification dataset metadata: %v\n", metadata)
		}

	}

	return nil
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.ListDatasetsRequest;
import com.google.cloud.automl.v1.LocationName;
import java.io.IOException;

class ListDatasets {

  static void listDatasets() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    listDatasets(projectId);
  }

  // List the datasets
  static void listDatasets(String projectId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // A resource that represents Google Cloud Platform location.
      LocationName projectLocation = LocationName.of(projectId, "us-central1");
      ListDatasetsRequest request =
          ListDatasetsRequest.newBuilder().setParent(projectLocation.toString()).build();

      // List all the datasets available in the region by applying filter.
      System.out.println("List of datasets:");
      for (Dataset dataset : client.listDatasets(request).iterateAll()) {
        // Display the dataset information
        System.out.format("\nDataset name: %s\n", dataset.getName());
        // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
        // required for other methods.
        // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
        String[] names = dataset.getName().split("/");
        String retrievedDatasetId = names[names.length - 1];
        System.out.format("Dataset id: %s\n", retrievedDatasetId);
        System.out.format("Dataset display name: %s\n", dataset.getDisplayName());
        System.out.println("Dataset create time:");
        System.out.format("\tseconds: %s\n", dataset.getCreateTime().getSeconds());
        System.out.format("\tnanos: %s\n", dataset.getCreateTime().getNanos());
        System.out.format(
            "Image classification dataset metadata: %s\n",
            dataset.getImageClassificationDatasetMetadata());
      }
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function listDatasets() {
  // Construct request
  const request = {
    parent: client.locationPath(projectId, location),
    filter: 'translation_dataset_metadata:*',
  };

  const [response] = await client.listDatasets(request);

  console.log('List of datasets:');
  for (const dataset of response) {
    console.log(`Dataset name: ${dataset.name}`);
    console.log(
      `Dataset id: ${
        dataset.name.split('/')[dataset.name.split('/').length - 1]
      }`
    );
    console.log(`Dataset display name: ${dataset.displayName}`);
    console.log('Dataset create time');
    console.log(`\tseconds ${dataset.createTime.seconds}`);
    console.log(`\tnanos ${dataset.createTime.nanos / 1e9}`);
    console.log(
      `Image classification dataset metadata: ${dataset.imageClassificationDatasetMetadata}`
    );
  }
}

listDatasets();

PHP

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

use Google\Cloud\AutoMl\V1\AutoMlClient;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';

$client = new AutoMlClient();

try {
    // resource that represents Google Cloud Platform location
    $formattedParent = $client->locationName(
        $projectId,
        $location
    );

    // list all datasets available in the region
    $filter = '';
    $pagedResponse = $client->listDatasets($formattedParent, $filter);

    print('List of datasets' . PHP_EOL);
    foreach ($pagedResponse->iteratePages() as $page) {
        foreach ($page as $dataset) {
            // display dataset information
            $splitName = explode('/', $dataset->getName());
            printf('Dataset name: %s' . PHP_EOL, $dataset->getName());
            printf('Dataset id: %s' . PHP_EOL, end($splitName));
            printf('Dataset display name: %s' . PHP_EOL, $dataset->getDisplayName());
            printf('Dataset create time' . PHP_EOL);
            printf('seconds: %d' . PHP_EOL, $dataset->getCreateTime()->getSeconds());
            printf('nanos : %d' . PHP_EOL, $dataset->getCreateTime()->getNanos());
            printf('Image classification dataset metadata: %s' . PHP_EOL, $dataset->getImageClassificationDatasetMetadata());
        }
    }
} finally {
    $client->close();
}

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"

client = automl.AutoMlClient()
# A resource that represents Google Cloud Platform location.
project_location = f"projects/{project_id}/locations/us-central1"

# List all the datasets available in the region.
request = automl.ListDatasetsRequest(parent=project_location, filter="")
response = client.list_datasets(request=request)

print("List of datasets:")
for dataset in response:
    print("Dataset name: {}".format(dataset.name))
    print("Dataset id: {}".format(dataset.name.split("/")[-1]))
    print("Dataset display name: {}".format(dataset.display_name))
    print("Dataset create time: {}".format(dataset.create_time))
    print(
        "Image classification dataset metadata: {}".format(
            dataset.image_classification_dataset_metadata
        )
    )

Ruby

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"

client = Google::Cloud::AutoML.auto_ml

# A resource that represents Google Cloud Platform location.
project_location = client.location_path project: project_id,
                                        location: "us-central1"

# List all the datasets available in the region.
datasets = client.list_datasets parent: project_location

puts "List of datasets:"

datasets.each do |dataset|
  puts "Dataset name: #{dataset.name}"
  puts "Dataset id: #{dataset.name.split('/').last}"
  puts "Dataset display name: #{dataset.display_name}"
  puts "Dataset create time: #{dataset.create_time.to_time}"
  puts "Image classification dataset metadata: #{dataset.image_classification_dataset_metadata}"
end

Get a dataset

You can also get a specific dataset using a dataset ID.

Web UI

To see a list of the available datasets using the AutoML Vision UI, click the Datasets link at the top of the left navigation menu.

Dataset list page

To see the datasets for a different project, select the project from the drop-down list on the left side of the title bar.

Access a specific dataset by selecting its name from the list.

REST & CMD LINE

Before using any of the request data below, make the following replacements:

  • project-id: your GCP project ID.
  • dataset-id: the ID of your dataset. The ID is the last element of the name of your dataset. For example:
    • dataset name: projects/project-id/locations/location-id/datasets/3104518874390609379
    • dataset id: 3104518874390609379

HTTP method and URL:

GET https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

To send your request, choose one of these options:

curl

Execute the following command:

curl -X GET \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

PowerShell

Execute the following command:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method GET `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id" | Select-Object -Expand Content

You should receive a JSON response similar to the following:

{
  "name": "projects/project-id/locations/us-central1/datasets/dataset-id",
  "displayName": "display-name",
  "createTime": "2019-10-29T17:31:12.010290Z",
  "etag": "AB3BwFoP09ffuRNnaWMx4UGi8uvYFctvOBjns84OercuMRIdXr0YINNiUqeW85SB3g4=",
  "exampleCount": 3667,
  "imageClassificationDatasetMetadata": {
    "classificationType": "MULTICLASS"
  }
}

Go

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	automlpb "google.golang.org/genproto/googleapis/cloud/automl/v1"
)

// getDataset gets a dataset.
func getDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)
	}
	defer client.Close()

	req := &automlpb.GetDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	dataset, err := client.GetDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %v", err)
	}

	fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
	fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
	fmt.Fprintf(w, "Dataset create time:\n")
	fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
	fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

	// Vision classification
	if metadata := dataset.GetImageClassificationDatasetMetadata(); metadata != nil {
		fmt.Fprintf(w, "Image classification dataset metadata: %v\n", metadata)
	}

	return nil
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.DatasetName;
import com.google.cloud.automl.v1.TranslationDatasetMetadata;
import com.google.protobuf.Timestamp;
import java.io.IOException;

class GetDataset {

  static void getDataset() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    getDataset(projectId, datasetId);
  }

  // Get a dataset
  static void getDataset(String projectId, String datasetId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Dataset dataset = client.getDataset(datasetFullId);

      // Display the dataset information
      System.out.format("Dataset name: %s%n", dataset.getName());
      // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
      // required for other methods.
      // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
      String[] names = dataset.getName().split("/");
      String retrievedDatasetId = names[names.length - 1];
      System.out.format("Dataset id: %s%n", retrievedDatasetId);
      System.out.format("Dataset display name: %s%n", dataset.getDisplayName());
      System.out.println("Dataset create time:");
      Timestamp createdTime = dataset.getCreateTime();
      System.out.format("\tseconds: %s%n", createdTime.getSeconds());
      System.out.format("\tnanos: %s%n", createdTime.getNanos());
      System.out.format(
          "Image classification dataset metadata: %s%n",
          dataset.getImageClassificationDatasetMetadata());
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function getDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [response] = await client.getDataset(request);

  console.log(`Dataset name: ${response.name}`);
  console.log(
    `Dataset id: ${
      response.name.split('/')[response.name.split('/').length - 1]
    }`
  );
  console.log(`Dataset display name: ${response.displayName}`);
  console.log('Dataset create time');
  console.log(`\tseconds ${response.createTime.seconds}`);
  console.log(`\tnanos ${response.createTime.nanos / 1e9}`);
  console.log(
    `Image classification dataset metadata: ${response.imageClassificationDatasetMetadata}`
  );
}

getDataset();

PHP

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

use Google\Cloud\AutoMl\V1\AutoMlClient;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $datasetId = 'my_dataset_id_123';

$client = new AutoMlClient();

try {
    // get full path of dataset
    $formattedName = $client->datasetName(
        $projectId,
        $location,
        $datasetId
    );

    $dataset = $client->getDataset($formattedName);

    // display dataset information
    $splitName = explode('/', $dataset->getName());
    printf('Dataset name: %s' . PHP_EOL, $dataset->getName());
    printf('Dataset id: %s' . PHP_EOL, end($splitName));
    printf('Dataset display name: %s' . PHP_EOL, $dataset->getDisplayName());
    printf('Dataset create time' . PHP_EOL);
    printf('seconds: %d' . PHP_EOL, $dataset->getCreateTime()->getSeconds());
    printf('nanos : %d' . PHP_EOL, $dataset->getCreateTime()->getNanos());
    printf('Image classification dataset metadata: %s' . PHP_EOL, $dataset->getImageClassificationDatasetMetadata());
} finally {
    $client->close();
}

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(
    project_id, "us-central1", dataset_id
)
dataset = client.get_dataset(name=dataset_full_id)

# Display the dataset information
print("Dataset name: {}".format(dataset.name))
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
print("Dataset display name: {}".format(dataset.display_name))
print("Dataset create time: {}".format(dataset.create_time))
print(
    "Image classification dataset metadata: {}".format(
        dataset.image_classification_dataset_metadata
    )
)

Ruby

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"
dataset_id = "YOUR_DATASET_ID"

client = Google::Cloud::AutoML.auto_ml
# Get the full path of the dataset
dataset_full_id = client.dataset_path project: project_id,
                                      location: "us-central1",
                                      dataset: dataset_id
dataset = client.get_dataset name: dataset_full_id

# Display the dataset information
puts "Dataset name: #{dataset.name}"
puts "Dataset id: #{dataset.name.split('/').last}"
puts "Dataset display name: #{dataset.display_name}"
puts "Dataset create time: #{dataset.create_time.to_time}"
puts "Image classification dataset metadata: #{dataset.image_classification_dataset_metadata}"

Export a dataset

You can export a CSV file with all a dataset's information to a Google Cloud Storage bucket.

Web UI

To export a non-empty dataset, complete the following steps:

  1. Select the non-empty dataset from the Datasets page.

    Dataset list page

    Selecting the non-empty dataset will take you to the Dataset details page.

    Label training images UI

  2. Select the Export data option at the top of the Dataset details page.

    export dataset button

    This opens a window where you can choose a Google Cloud Storage bucket location, or create a new bucket and designate it as the location to store the CSV file.

    label stats popout window

  3. Select Export CSV after you have selected a new or existing Google Cloud Storage bucket location.

    export CSV button

  4. You will receive an email when the data export process has completed.

REST & CMD LINE

Before using any of the request data below, make the following replacements:

  • project-id: your GCP project ID.
  • dataset-id: the ID of your dataset. The ID is the last element of the name of your dataset. For example:
    • dataset name: projects/project-id/locations/location-id/datasets/3104518874390609379
    • dataset id: 3104518874390609379
  • output-storage-bucket: a Google Cloud Storage bucket/directory to save output files to, expressed in the following form: gs://bucket/directory/. The requesting user must have write permission to the bucket.

HTTP method and URL:

POST https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id:exportData

Request JSON body:

{
  "outputConfig": {
    "gcsDestination": {
      "outputUriPrefix": "cloud-storage-bucket"
    }
  }
}

To send your request, choose one of these options:

curl

Save the request body in a file called request.json, and execute the following command:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id:exportData

PowerShell

Save the request body in a file called request.json, and execute the following command:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id:exportData" | Select-Object -Expand Content

You should see output similar to the following. You can use the operation ID to get the status of the task. For an example, see Working with long-running operations

{
  "name": "projects/project-id/locations/us-central1/operations/operation-id",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1.OperationMetadata",
    "createTime": "2019-08-12T18:52:31.637075Z",
    "updateTime": "2019-08-12T18:52:31.637075Z",
    "exportDataDetails": {
      "outputInfo": {
        "gcsOutputDirectory": "cloud-storage-bucket/export_data-dataset-name-timestamp-of-export-call/"
      }
    }
  }
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.cloud.automl.v1.GcsDestination;
import com.google.cloud.automl.v1.OutputConfig;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class ExportDataset {

  static void exportDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsUri = "gs://BUCKET_ID/path_to_export/";
    exportDataset(projectId, datasetId, gcsUri);
  }

  // Export a dataset to a GCS bucket
  static void exportDataset(String projectId, String datasetId, String gcsUri)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setOutputUriPrefix(gcsUri).build();

      // Export the dataset to the output URI.
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

      System.out.println("Processing export...");
      Empty response = client.exportDataAsync(datasetFullId, outputConfig).get();
      System.out.format("Dataset exported. %s\n", response);
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';
// const gcsUri = 'gs://BUCKET_ID/path_to_export/';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function exportDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
    outputConfig: {
      gcsDestination: {
        outputUriPrefix: gcsUri,
      },
    },
  };

  const [operation] = await client.exportData(request);
  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset exported: ${response}`);
}

exportDataset();

PHP

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

use Google\Cloud\AutoMl\V1\AutoMlClient;
use Google\Cloud\AutoMl\V1\GcsDestination;
use Google\Cloud\AutoMl\V1\OutputConfig;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $datasetId = 'my_dataset_id_123';
// $gcsUri = 'gs://BUCKET_ID/path_to_export/';

$client = new AutoMlClient();

try {
    // get full path of dataset
    $formattedName = $client->datasetName(
        $projectId,
        $location,
        $datasetId
    );

    // set GCS uri
    $gcsDestination = (new GcsDestination())
        ->setInputUri($gcsUri);
    $outputConfig = (new OutputConfig())
        ->setGcsDestination($gcsDestination);

    $operationResponse = $client->exportData($formattedName, $outputConfig);
    $operationResponse->pollUntilComplete();
    if ($operationResponse->operationSucceeded()) {
        $result = $operationResponse->getResult();
        printf('Dataset exported.' . PHP_EOL);
    } else {
        $error = $operationResponse->getError();
        // handleError($error)
    }
} finally {
    $client->close();
}

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"
# gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/"

client = automl.AutoMlClient()

# Get the full path of the dataset
dataset_full_id = client.dataset_path(
    project_id, "us-central1", dataset_id
)

gcs_destination = automl.GcsDestination(output_uri_prefix=gcs_uri)
output_config = automl.OutputConfig(gcs_destination=gcs_destination)

response = client.export_data(name=dataset_full_id, output_config=output_config)
print(f"Dataset exported. {response.result()}")

Ruby

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"
dataset_id = "YOUR_DATASET_ID"
gcs_uri = "gs://BUCKET_ID/path_to_export/"

client = Google::Cloud::AutoML.auto_ml

# Get the full path of the dataset
dataset_full_id = client.dataset_path project: project_id,
                                      location: "us-central1",
                                      dataset: dataset_id
output_config = {
  gcs_destination: {
    output_uri_prefix: gcs_uri
  }
}

operation = client.export_data name: dataset_full_id,
                               output_config: output_config

# Wait until the long running operation is done
operation.wait_until_done!

puts "Dataset exported."

Exported CSV format

The exported CSV file contains the same formatting as the training data import CSV:

set,path,label0[,label1,label2,...]

This CSV file is saved in a created export folder that is distinguished by a unique timestamp. Below are some sample lines from an exported CSV file:

my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/image_classification_1.csv :

TRAIN,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img874.jpg,dandelion
VALIDATION,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img447.jpg,roses
TRAIN,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img672.jpg,dandelion
VALIDATION,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img421.jpg,sunflowers
TRAIN,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img495.jpg,tulips
TEST,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img014.jpg,sunflowers

Deleting a dataset

You can delete a dataset you no longer need in the UI, or with the dataset's ID via the following code samples.

Web UI

  1. In the Vision Dashboard, click the Datasets link at the top of the left navigation menu to display the list of available datasets.

  2. Click the three-dot menu at the far right of the row you want to delete and select Delete dataset.

  3. Click Delete in the confirmation dialog box.

    Dataset list page

REST & CMD LINE

Before using any of the request data below, make the following replacements:

  • project-id: your GCP project ID.
  • dataset-id: the ID of your dataset. The ID is the last element of the name of your dataset. For example:
    • dataset name: projects/project-id/locations/location-id/datasets/3104518874390609379
    • dataset id: 3104518874390609379

HTTP method and URL:

DELETE https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

To send your request, choose one of these options:

curl

Execute the following command:

curl -X DELETE \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

PowerShell

Execute the following command:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method DELETE `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id" | Select-Object -Expand Content

You should see output similar to the following. You can use the operation ID to get the status of the task. For an example, see Working with long-running operations

{
  "name": "projects/project-id/locations/us-central1/operations/operation-id",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1.OperationMetadata",
    "createTime": "2019-11-08T22:37:19.822128Z",
    "updateTime": "2019-11-08T22:37:19.822128Z",
    "deleteDetails": {}
  },
  "done": true,
  "response": {
    "@type": "type.googleapis.com/google.protobuf.Empty"
  }
}

Go

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	automlpb "google.golang.org/genproto/googleapis/cloud/automl/v1"
)

// deleteDataset deletes a dataset.
func deleteDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)
	}
	defer client.Close()

	req := &automlpb.DeleteDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	op, err := client.DeleteDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %v", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %v", err)
	}

	fmt.Fprintf(w, "Dataset deleted.\n")

	return nil
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class DeleteDataset {

  static void deleteDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    deleteDataset(projectId, datasetId);
  }

  // Delete a dataset
  static void deleteDataset(String projectId, String datasetId)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the full path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Empty response = client.deleteDatasetAsync(datasetFullId).get();
      System.out.format("Dataset deleted. %s\n", response);
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function deleteDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [operation] = await client.deleteDataset(request);

  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset deleted: ${response}`);
}

deleteDataset();

PHP

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

use Google\Cloud\AutoMl\V1\AutoMlClient;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $datasetId = 'my_dataset_id_123';

$client = new AutoMlClient();

try {
    // get full path of dataset
    $formattedName = $client->datasetName(
        $projectId,
        $location,
        $datasetId
    );

    $operationResponse = $client->deleteDataset($formattedName);
    $operationResponse->pollUntilComplete();
    if ($operationResponse->operationSucceeded()) {
        $result = $operationResponse->getResult();
        printf('Dataset deleted.' . PHP_EOL);
    } else {
        $error = $operationResponse->getError();
        // handleError($error)
    }
} finally {
    $client->close();
}

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(
    project_id, "us-central1", dataset_id
)
response = client.delete_dataset(name=dataset_full_id)

print("Dataset deleted. {}".format(response.result()))

Ruby

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"
dataset_id = "YOUR_DATASET_ID"

client = Google::Cloud::AutoML.auto_ml

# Get the full path of the dataset
dataset_full_id = client.dataset_path project: project_id,
                                      location: "us-central1",
                                      dataset: dataset_id

operation = client.delete_dataset name: dataset_full_id

# Wait until the long running operation is done
operation.wait_until_done!

puts "Dataset deleted."