Managing datasets

A project can have multiple datasets, each used to train a separate model. You can get a list of the available datasets, get a specific dataset, export a dataset and can delete a dataset you no longer need.

Listing datasets

A project can include numerous datasets. This section describes how to retrieve a list of the available datasets for a project.

Web UI

To see a list of the available datasets using the AutoML Vision Object Detection UI, click the Datasets link at the top of the left navigation menu.

Dataset list page

To see the datasets for a different project, select the project from the drop-down list on the left side of the title bar.

REST

Before using any of the request data, make the following replacements:

  • project-id: your GCP project ID.

HTTP method and URL:

GET https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets

To send your request, choose one of these options:

curl

Execute the following command:

curl -X GET \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "x-goog-user-project: project-id" \
"https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets"

PowerShell

Execute the following command:

$cred = gcloud auth print-access-token
$headers = @{ "Authorization" = "Bearer $cred"; "x-goog-user-project" = "project-id" }

Invoke-WebRequest `
-Method GET `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets" | Select-Object -Expand Content

You should receive a JSON response similar to the following:

{
  "datasets": [
    {
      "name": "projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID",
      "displayName": "DISPLAY_NAME",
      "createTime": "2018-10-29T15:45:53.353442Z",
      "exampleCount": 227,
      "imageObjectDetectionDatasetMetadata": {}
    }
  ]
}

Go

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
	"google.golang.org/api/iterator"
)

// listDatasets lists existing datasets.
func listDatasets(w io.Writer, projectID string, location string) error {
	// projectID := "my-project-id"
	// location := "us-central1"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.ListDatasetsRequest{
		Parent: fmt.Sprintf("projects/%s/locations/%s", projectID, location),
	}

	it := client.ListDatasets(ctx, req)

	// Iterate over all results
	for {
		dataset, err := it.Next()
		if err == iterator.Done {
			break
		}
		if err != nil {
			return fmt.Errorf("ListGlossaries.Next: %w", err)
		}

		fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
		fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
		fmt.Fprintf(w, "Dataset create time:\n")
		fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
		fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

		// Vision object detection
		if metadata := dataset.GetImageObjectDetectionDatasetMetadata(); metadata != nil {
			fmt.Fprintf(w, "Image object detection dataset metadata: %v\n", metadata)
		}

	}

	return nil
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.ListDatasetsRequest;
import com.google.cloud.automl.v1.LocationName;
import java.io.IOException;

class ListDatasets {

  static void listDatasets() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    listDatasets(projectId);
  }

  // List the datasets
  static void listDatasets(String projectId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // A resource that represents Google Cloud Platform location.
      LocationName projectLocation = LocationName.of(projectId, "us-central1");
      ListDatasetsRequest request =
          ListDatasetsRequest.newBuilder().setParent(projectLocation.toString()).build();

      // List all the datasets available in the region by applying filter.
      System.out.println("List of datasets:");
      for (Dataset dataset : client.listDatasets(request).iterateAll()) {
        // Display the dataset information
        System.out.format("\nDataset name: %s\n", dataset.getName());
        // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
        // required for other methods.
        // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
        String[] names = dataset.getName().split("/");
        String retrievedDatasetId = names[names.length - 1];
        System.out.format("Dataset id: %s\n", retrievedDatasetId);
        System.out.format("Dataset display name: %s\n", dataset.getDisplayName());
        System.out.println("Dataset create time:");
        System.out.format("\tseconds: %s\n", dataset.getCreateTime().getSeconds());
        System.out.format("\tnanos: %s\n", dataset.getCreateTime().getNanos());
        System.out.format(
            "Image object detection dataset metadata: %s\n",
            dataset.getImageObjectDetectionDatasetMetadata());
      }
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function listDatasets() {
  // Construct request
  const request = {
    parent: client.locationPath(projectId, location),
    filter: 'translation_dataset_metadata:*',
  };

  const [response] = await client.listDatasets(request);

  console.log('List of datasets:');
  for (const dataset of response) {
    console.log(`Dataset name: ${dataset.name}`);
    console.log(
      `Dataset id: ${
        dataset.name.split('/')[dataset.name.split('/').length - 1]
      }`
    );
    console.log(`Dataset display name: ${dataset.displayName}`);
    console.log('Dataset create time');
    console.log(`\tseconds ${dataset.createTime.seconds}`);
    console.log(`\tnanos ${dataset.createTime.nanos / 1e9}`);
    console.log(
      `Image object detection dataset metatdata: ${dataset.imageObjectDetectionDatasetMetatdata}`
    );
  }
}

listDatasets();

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"

client = automl.AutoMlClient()
# A resource that represents Google Cloud Platform location.
project_location = f"projects/{project_id}/locations/us-central1"

# List all the datasets available in the region.
request = automl.ListDatasetsRequest(parent=project_location, filter="")
response = client.list_datasets(request=request)

print("List of datasets:")
for dataset in response:
    print(f"Dataset name: {dataset.name}")
    print("Dataset id: {}".format(dataset.name.split("/")[-1]))
    print(f"Dataset display name: {dataset.display_name}")
    print(f"Dataset create time: {dataset.create_time}")
    print(
        "Image object detection dataset metadata: {}".format(
            dataset.image_object_detection_dataset_metadata
        )
    )

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for Ruby.

Get a dataset

You can also get a specific dataset using a dataset ID.

Web UI

To see a list of the available datasets using the AutoML Vision Object Detection UI, click the Datasets link at the top of the left navigation menu.

Dataset list page

To see the datasets for a different project, select the project from the drop-down list on the left side of the title bar.

Access a specific dataset by selecting its name from the list.

REST

Before using any of the request data, make the following replacements:

  • project-id: your GCP project ID.
  • dataset-id: the ID of your dataset. The ID is the last element of the name of your dataset. For example:
    • dataset name: projects/project-id/locations/location-id/datasets/3104518874390609379
    • dataset id: 3104518874390609379

HTTP method and URL:

GET https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID

To send your request, choose one of these options:

curl

Execute the following command:

curl -X GET \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "x-goog-user-project: project-id" \
"https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID"

PowerShell

Execute the following command:

$cred = gcloud auth print-access-token
$headers = @{ "Authorization" = "Bearer $cred"; "x-goog-user-project" = "project-id" }

Invoke-WebRequest `
-Method GET `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID" | Select-Object -Expand Content

You should receive a JSON response similar to the following:

{
  "name": "projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID",
  "displayName": "DISPLAY_NAME",
  "createTime": "2019-03-31T22:29:41.136184Z",
  "etag": "AB3BwFo-bssF99O7d4iI4_kwfnSi5pIK8FQ4D8h6Z_EaC4thAeZFbgbaIDvqXWuzjx9s",
  "exampleCount": 225,
  "imageObjectDetectionDatasetMetadata": {}
}

Go

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
)

// getDataset gets a dataset.
func getDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.GetDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	dataset, err := client.GetDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %w", err)
	}

	fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
	fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
	fmt.Fprintf(w, "Dataset create time:\n")
	fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
	fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

	// Vision object detection
	if metadata := dataset.GetImageObjectDetectionDatasetMetadata(); metadata != nil {
		fmt.Fprintf(w, "Image object detection dataset metadata: %v\n", metadata)
	}

	return nil
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.DatasetName;
import java.io.IOException;

class GetDataset {

  static void getDataset() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    getDataset(projectId, datasetId);
  }

  // Get a dataset
  static void getDataset(String projectId, String datasetId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Dataset dataset = client.getDataset(datasetFullId);

      // Display the dataset information
      System.out.format("Dataset name: %s\n", dataset.getName());
      // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
      // required for other methods.
      // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
      String[] names = dataset.getName().split("/");
      String retrievedDatasetId = names[names.length - 1];
      System.out.format("Dataset id: %s\n", retrievedDatasetId);
      System.out.format("Dataset display name: %s\n", dataset.getDisplayName());
      System.out.println("Dataset create time:");
      System.out.format("\tseconds: %s\n", dataset.getCreateTime().getSeconds());
      System.out.format("\tnanos: %s\n", dataset.getCreateTime().getNanos());
      System.out.format(
          "Image object detection dataset metadata: %s\n",
          dataset.getImageObjectDetectionDatasetMetadata());
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function getDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [response] = await client.getDataset(request);

  console.log(`Dataset name: ${response.name}`);
  console.log(
    `Dataset id: ${
      response.name.split('/')[response.name.split('/').length - 1]
    }`
  );
  console.log(`Dataset display name: ${response.displayName}`);
  console.log('Dataset create time');
  console.log(`\tseconds ${response.createTime.seconds}`);
  console.log(`\tnanos ${response.createTime.nanos / 1e9}`);
  console.log(
    `Image object detection dataset metatdata: ${response.imageObjectDetectionDatasetMetatdata}`
  );
}

getDataset();

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(project_id, "us-central1", dataset_id)
dataset = client.get_dataset(name=dataset_full_id)

# Display the dataset information
print(f"Dataset name: {dataset.name}")
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
print(f"Dataset display name: {dataset.display_name}")
print(f"Dataset create time: {dataset.create_time}")
print(
    "Image object detection dataset metadata: {}".format(
        dataset.image_object_detection_dataset_metadata
    )
)

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for Ruby.

Export a dataset

You can export a CSV file with all a dataset's information to a Google Cloud Storage bucket. This is particularly helpful for the cases where you added, deleted, or modified training image annotations in the UI.

Web UI

To export a non-empty dataset, complete the following steps:

  1. Select the non-empty dataset from the Datasets page.

    Listing dataset image

    Selecting the non-empty dataset will take you to the Dataset details page.

    Label training images UI

  2. Select the Export data option at the top of the Dataset details page.

    export dataset button

    This opens a window where you can choose a Google Cloud Storage bucket location, or create a new bucket and designate it as the location to store the CSV file.

    label stats popout window

  3. Select Export CSV after you have selected a new or existing Google Cloud Storage bucket location.

    export CSV button

  4. You will receive an email when the data export process has completed.

    export complete email

REST

Before using any of the request data, make the following replacements:

  • project-id: your GCP project ID.
  • dataset-id: the ID of your dataset. The ID is the last element of the name of your dataset. For example:
    • dataset name: projects/project-id/locations/location-id/datasets/3104518874390609379
    • dataset id: 3104518874390609379
  • output-storage-bucket: a Google Cloud Storage bucket/directory to save output files to, expressed in the following form: gs://bucket/directory/. The requesting user must have write permission to the bucket.

HTTP method and URL:

POST https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID:exportData

Request JSON body:

{
  "outputConfig": {
    "gcsDestination": {
      "outputUriPrefix": "CLOUD_STORAGE_BUCKET"
    }
  }
}

To send your request, choose one of these options:

curl

Save the request body in a file named request.json, and execute the following command:

curl -X POST \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "x-goog-user-project: project-id" \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID:exportData"

PowerShell

Save the request body in a file named request.json, and execute the following command:

$cred = gcloud auth print-access-token
$headers = @{ "Authorization" = "Bearer $cred"; "x-goog-user-project" = "project-id" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID:exportData" | Select-Object -Expand Content

You should see output similar to the following. You can use the operation ID to get the status of the task. For an example, see Working with long-running operations.

{
  "name": "projects/PROJECT_ID/locations/us-central1/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1.OperationMetadata",
    "createTime": "2019-08-12T18:52:31.637075Z",
    "updateTime": "2019-08-12T18:52:31.637075Z",
    "exportDataDetails": {
      "outputInfo": {
        "gcsOutputDirectory": "CLOUD_STORAGE_BUCKET/export_data-DATASET_NAME-TIMESTAMP_OF_EXPORT_CALL/"
      }
    }
  }
}

Go

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
)

// exportDataset exports a dataset.
func exportDataset(w io.Writer, projectID string, location string, datasetID string, outputURI string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."
	// outputURI := "gs://BUCKET_ID/path_to_export/"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.ExportDataRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
		OutputConfig: &automlpb.OutputConfig{
			Destination: &automlpb.OutputConfig_GcsDestination{
				GcsDestination: &automlpb.GcsDestination{
					OutputUriPrefix: outputURI,
				},
			},
		},
	}

	op, err := client.ExportData(ctx, req)
	if err != nil {
		return fmt.Errorf("ExportData: %w", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %w", err)
	}

	fmt.Fprintf(w, "Dataset exported.\n")

	return nil
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.cloud.automl.v1.GcsDestination;
import com.google.cloud.automl.v1.OutputConfig;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class ExportDataset {

  static void exportDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsUri = "gs://BUCKET_ID/path_to_export/";
    exportDataset(projectId, datasetId, gcsUri);
  }

  // Export a dataset to a GCS bucket
  static void exportDataset(String projectId, String datasetId, String gcsUri)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setOutputUriPrefix(gcsUri).build();

      // Export the dataset to the output URI.
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

      System.out.println("Processing export...");
      Empty response = client.exportDataAsync(datasetFullId, outputConfig).get();
      System.out.format("Dataset exported. %s\n", response);
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';
// const gcsUri = 'gs://BUCKET_ID/path_to_export/';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function exportDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
    outputConfig: {
      gcsDestination: {
        outputUriPrefix: gcsUri,
      },
    },
  };

  const [operation] = await client.exportData(request);
  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset exported: ${response}`);
}

exportDataset();

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"
# gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/"

client = automl.AutoMlClient()

# Get the full path of the dataset
dataset_full_id = client.dataset_path(project_id, "us-central1", dataset_id)

gcs_destination = automl.GcsDestination(output_uri_prefix=gcs_uri)
output_config = automl.OutputConfig(gcs_destination=gcs_destination)

response = client.export_data(name=dataset_full_id, output_config=output_config)
print(f"Dataset exported. {response.result()}")

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for Ruby.

Exported CSV format

The exported CSV file contains the same formatting as the training data import CSV:

set,path,label,x_min,y_min,x_max,y_min,x_max,y_max,x_min,y_max

This CSV file is saved in a created export folder that is distinguished by a unique timestamp. Below are some sample lines from an exported CSV file:

/export_data-salad_dataset-2019-05-29T18:12:18.750Z/image_object_detection_1.csv

TRAIN,gs://my-storage-bucket/img/img009.jpg,Cheese,0.643239,0.362779,0.662498,0.362779,0.662498,0.416544,0.643239,0.416544
TRAIN,gs://my-storage-bucket/img/img009.jpg,Salad,0.205697,0.255249,0.459074,0.255249,0.459074,0.775244,0.205697,0.775244
TEST,gs://my-storage-bucket/img/img118.jpg,Cheese,0.320334,0.501238,0.726751,0.501238,0.726751,0.741431,0.320334,0.741431
TEST,gs://my-storage-bucket/img/img118.jpg,Salad,0.0,0.037361,1.0,0.037361,1.0,0.926321,0.0,0.926321
TEST,gs://my-storage-bucket/img/img118.jpg,Cheese,0.358745,0.29076,0.740381,0.29076,0.740381,0.497936,0.358745,0.497936
TRAIN,gs://my-storage-bucket/img/img375.jpg,Tomato,0.027274,0.41247,0.43122,0.41247,0.43122,0.702593,0.027274,0.702593
VALIDATION,gs://my-storage-bucket/img/img852.jpg,Tomato,0.716958,0.178534,0.805999,0.178534,0.805999,0.329861,0.716958,0.329861
VALIDATION,gs://my-storage-bucket/img/img852.jpg,Tomato,0.858044,0.297255,0.950847,0.297255,0.950847,0.39173,0.858044,0.39173
VALIDATION,gs://my-storage-bucket/img/img852.jpg,Tomato,0.199644,0.624155,0.321919,0.624155,0.321919,0.796384,0.199644,0.796384
VALIDATION,gs://my-storage-bucket/img/img852.jpg,Cheese,0.399672,0.277189,0.600955,0.277189,0.600955,0.47032,0.399672,0.47032

Note that there is still only one representation of a bounding box and its label per line. This information tells you the following:

  • img009.jpg - is in the TRAIN set and contains two bounding boxes with Cheese and Salad labels
  • img118.jpg - is in the TEST set and has three bounding boxes with Cheese, Salad, and Cheese labels
  • img375.jpg - is in the TRAIN set and has one bounding box with a Tomato label
  • img852.jpg is in the VALIDATION set and has 4 distinct bounding boxes with Tomato, Tomato, Tomato, and Cheese labels

Deleting a dataset

You can delete a dataset resource using the dataset's ID.

Web UI

  1. In the AutoML Vision Object Detection UI, click the Datasets link at the top of the left navigation menu to display the list of available datasets.

    Deleting a dataset UI

  2. Click the three-dot menu at the far right of the row you want to delete and select Delete dataset.

  3. Click Delete in the confirmation dialog box.

REST

Before using any of the request data, make the following replacements:

  • project-id: your GCP project ID.
  • dataset-id: the ID of your dataset. The ID is the last element of the name of your dataset. For example:
    • dataset name: projects/project-id/locations/location-id/datasets/3104518874390609379
    • dataset id: 3104518874390609379

HTTP method and URL:

DELETE https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID

To send your request, choose one of these options:

curl

Execute the following command:

curl -X DELETE \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "x-goog-user-project: project-id" \
"https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID"

PowerShell

Execute the following command:

$cred = gcloud auth print-access-token
$headers = @{ "Authorization" = "Bearer $cred"; "x-goog-user-project" = "project-id" }

Invoke-WebRequest `
-Method DELETE `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/datasets/DATASET_ID" | Select-Object -Expand Content

You should see output similar to the following. You can use the operation ID to get the status of the task. For an example, see Working with long-running operations.

{
  "name": "projects/PROJECT_ID/locations/us-central1/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1.OperationMetadata",
    "createTime": "2019-11-08T22:37:19.822128Z",
    "updateTime": "2019-11-08T22:37:19.822128Z",
    "deleteDetails": {}
  },
  "done": true,
  "response": {
    "@type": "type.googleapis.com/google.protobuf.Empty"
  }
}

Go

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
)

// deleteDataset deletes a dataset.
func deleteDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.DeleteDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	op, err := client.DeleteDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %w", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %w", err)
	}

	fmt.Fprintf(w, "Dataset deleted.\n")

	return nil
}

Java

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class DeleteDataset {

  static void deleteDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    deleteDataset(projectId, datasetId);
  }

  // Delete a dataset
  static void deleteDataset(String projectId, String datasetId)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the full path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Empty response = client.deleteDatasetAsync(datasetFullId).get();
      System.out.format("Dataset deleted. %s\n", response);
    }
  }
}

Node.js

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function deleteDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [operation] = await client.deleteDataset(request);

  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset deleted: ${response}`);
}

deleteDataset();

Python

Before trying this sample, follow the setup instructions for this language on the Client Libraries page.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(project_id, "us-central1", dataset_id)
response = client.delete_dataset(name=dataset_full_id)

print(f"Dataset deleted. {response.result()}")

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the AutoML Vision Object Detection reference documentation for Ruby.