Managing datasets

A project can have multiple datasets, each used to train a separate model. You can get a list of the available datasets and can delete datasets you no longer need.

For information about creating a dataset and importing data into it, see Creating datasets and importing data.

Listing datasets

A project can include numerous datasets. This section describes how to retrieve a list of the available datasets for a project.

To see a list of the available datasets using the AutoML Natural Language UI, click the Datasets link at the top of the left navigation menu.

To see the datasets for a different project, select the project from the drop-down list in the upper right of the title bar.

REST

Before using any of the request data, make the following replacements:

  • project-id: your project ID
  • location-id: the location for the resource, us-central1 for the Global location or eu for the European Union

HTTP method and URL:

GET https://automl.googleapis.com/v1/projects/project-id/locations/location-id/datasets

To send your request, expand one of these options:

You should receive a JSON response similar to the following:

{
  "datasets": [
    {
      "name": "projects/434039606874/locations/us-central1/datasets/356587829854924648",
      "displayName": "test_dataset",
      "createTime": "2018-04-26T18:02:59.825060Z",
      "textClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    },
    {
      "name": "projects/434039606874/locations/us-central1/datasets/3104518874390609379",
      "displayName": "test",
      "createTime": "2017-12-16T01:10:38.328280Z",
      "textClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    }
  ]
}

Python

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Python API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"

client = automl.AutoMlClient()
# A resource that represents Google Cloud Platform location.
project_location = f"projects/{project_id}/locations/us-central1"

# List all the datasets available in the region.
request = automl.ListDatasetsRequest(parent=project_location, filter="")
response = client.list_datasets(request=request)

print("List of datasets:")
for dataset in response:
    print(f"Dataset name: {dataset.name}")
    print("Dataset id: {}".format(dataset.name.split("/")[-1]))
    print(f"Dataset display name: {dataset.display_name}")
    print(f"Dataset create time: {dataset.create_time}")
    print(
        "Text classification dataset metadata: {}".format(
            dataset.text_classification_dataset_metadata
        )
    )

Java

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Java API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.ListDatasetsRequest;
import com.google.cloud.automl.v1.LocationName;
import java.io.IOException;

class ListDatasets {

  static void listDatasets() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    listDatasets(projectId);
  }

  // List the datasets
  static void listDatasets(String projectId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // A resource that represents Google Cloud Platform location.
      LocationName projectLocation = LocationName.of(projectId, "us-central1");
      ListDatasetsRequest request =
          ListDatasetsRequest.newBuilder().setParent(projectLocation.toString()).build();

      // List all the datasets available in the region by applying filter.
      System.out.println("List of datasets:");
      for (Dataset dataset : client.listDatasets(request).iterateAll()) {
        // Display the dataset information
        System.out.format("\nDataset name: %s\n", dataset.getName());
        // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
        // required for other methods.
        // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
        String[] names = dataset.getName().split("/");
        String retrievedDatasetId = names[names.length - 1];
        System.out.format("Dataset id: %s\n", retrievedDatasetId);
        System.out.format("Dataset display name: %s\n", dataset.getDisplayName());
        System.out.println("Dataset create time:");
        System.out.format("\tseconds: %s\n", dataset.getCreateTime().getSeconds());
        System.out.format("\tnanos: %s\n", dataset.getCreateTime().getNanos());
        System.out.format(
            "Text classification dataset metadata: %s\n",
            dataset.getTextClassificationDatasetMetadata());
      }
    }
  }
}

Node.js

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Node.js API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function listDatasets() {
  // Construct request
  const request = {
    parent: client.locationPath(projectId, location),
    filter: 'translation_dataset_metadata:*',
  };

  const [response] = await client.listDatasets(request);

  console.log('List of datasets:');
  for (const dataset of response) {
    console.log(`Dataset name: ${dataset.name}`);
    console.log(
      `Dataset id: ${
        dataset.name.split('/')[dataset.name.split('/').length - 1]
      }`
    );
    console.log(`Dataset display name: ${dataset.displayName}`);
    console.log('Dataset create time');
    console.log(`\tseconds ${dataset.createTime.seconds}`);
    console.log(`\tnanos ${dataset.createTime.nanos / 1e9}`);
    console.log(
      `Text classification dataset metadata: ${dataset.textClassificationDatasetMetadata}`
    );
  }
}

listDatasets();

Go

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Go API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
	"google.golang.org/api/iterator"
)

// listDatasets lists existing datasets.
func listDatasets(w io.Writer, projectID string, location string) error {
	// projectID := "my-project-id"
	// location := "us-central1"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.ListDatasetsRequest{
		Parent: fmt.Sprintf("projects/%s/locations/%s", projectID, location),
	}

	it := client.ListDatasets(ctx, req)

	// Iterate over all results
	for {
		dataset, err := it.Next()
		if err == iterator.Done {
			break
		}
		if err != nil {
			return fmt.Errorf("ListGlossaries.Next: %w", err)
		}

		fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
		fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
		fmt.Fprintf(w, "Dataset create time:\n")
		fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
		fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

		// Language text classification
		if metadata := dataset.GetTextClassificationDatasetMetadata(); metadata != nil {
			fmt.Fprintf(w, "Text classification dataset metadata: %v\n", metadata)
		}

	}

	return nil
}

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for Ruby.

Export a dataset

You can export a CSV file with all a dataset's information to a Cloud Storage bucket. The exported CSV file has the same format as the training data import CSV.

To export a dataset:

  1. Select the dataset you want to export documents into from the Datasets page.

  2. Click the Export data option at the top of the Dataset details page.

  3. Navigate to the Cloud Storage bucket where you want to write the export CSV file.

  4. Click Export CSV.

    You will receive an email when the data export process has completed.

Python

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Python API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"
# gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/"

client = automl.AutoMlClient()

# Get the full path of the dataset
dataset_full_id = client.dataset_path(project_id, "us-central1", dataset_id)

gcs_destination = automl.GcsDestination(output_uri_prefix=gcs_uri)
output_config = automl.OutputConfig(gcs_destination=gcs_destination)

response = client.export_data(name=dataset_full_id, output_config=output_config)
print(f"Dataset exported. {response.result()}")

Java

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Java API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.cloud.automl.v1.GcsDestination;
import com.google.cloud.automl.v1.OutputConfig;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class ExportDataset {

  static void exportDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsUri = "gs://BUCKET_ID/path_to_export/";
    exportDataset(projectId, datasetId, gcsUri);
  }

  // Export a dataset to a GCS bucket
  static void exportDataset(String projectId, String datasetId, String gcsUri)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setOutputUriPrefix(gcsUri).build();

      // Export the dataset to the output URI.
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

      System.out.println("Processing export...");
      Empty response = client.exportDataAsync(datasetFullId, outputConfig).get();
      System.out.format("Dataset exported. %s\n", response);
    }
  }
}

Node.js

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Node.js API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';
// const gcsUri = 'gs://BUCKET_ID/path_to_export/';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function exportDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
    outputConfig: {
      gcsDestination: {
        outputUriPrefix: gcsUri,
      },
    },
  };

  const [operation] = await client.exportData(request);
  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset exported: ${response}`);
}

exportDataset();

Go

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Go API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
)

// exportDataset exports a dataset.
func exportDataset(w io.Writer, projectID string, location string, datasetID string, outputURI string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."
	// outputURI := "gs://BUCKET_ID/path_to_export/"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.ExportDataRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
		OutputConfig: &automlpb.OutputConfig{
			Destination: &automlpb.OutputConfig_GcsDestination{
				GcsDestination: &automlpb.GcsDestination{
					OutputUriPrefix: outputURI,
				},
			},
		},
	}

	op, err := client.ExportData(ctx, req)
	if err != nil {
		return fmt.Errorf("ExportData: %w", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %w", err)
	}

	fmt.Fprintf(w, "Dataset exported.\n")

	return nil
}

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for Ruby.

Deleting a dataset

To delete a dataset in the AutoML Natural Language UI:

  1. Click the three-dot menu at the far right of the dataset you want to delete and select Delete dataset.

  2. Click Delete in the confirmation dialog box.

REST

Before using any of the request data, make the following replacements:

  • project-id: your project ID
  • location-id: the location for the resource, us-central1 for the Global location or eu for the European Union
  • dataset-id: your dataset ID

HTTP method and URL:

DELETE https://automl.googleapis.com/v1/projects/project-id/locations/location-id/datasets/dataset-id

To send your request, expand one of these options:

You should receive a JSON response similar to the following:

{
  "name": "projects/434039606874/locations/us-central1/operations/4422270194425422927",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1beta1.OperationMetadata",
    "progressPercentage": 100,
    "createTime": "2018-04-27T02:33:02.479200Z",
    "updateTime": "2018-04-27T02:35:17.309060Z"
  },
  "done": true,
  "response": {
    "@type": "type.googleapis.com/google.protobuf.Empty"
  }
}

Python

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Python API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(project_id, "us-central1", dataset_id)
response = client.delete_dataset(name=dataset_full_id)

print(f"Dataset deleted. {response.result()}")

Java

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Java API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class DeleteDataset {

  static void deleteDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    deleteDataset(projectId, datasetId);
  }

  // Delete a dataset
  static void deleteDataset(String projectId, String datasetId)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the full path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Empty response = client.deleteDatasetAsync(datasetFullId).get();
      System.out.format("Dataset deleted. %s\n", response);
    }
  }
}

Node.js

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Node.js API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function deleteDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [operation] = await client.deleteDataset(request);

  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset deleted: ${response}`);
}

deleteDataset();

Go

To learn how to install and use the client library for AutoML Natural Language, see AutoML Natural Language client libraries. For more information, see the AutoML Natural Language Go API reference documentation.

To authenticate to AutoML Natural Language, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
)

// deleteDataset deletes a dataset.
func deleteDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.DeleteDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	op, err := client.DeleteDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %w", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %w", err)
	}

	fmt.Fprintf(w, "Dataset deleted.\n")

	return nil
}

Additional languages

C#: Please follow the C# setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for .NET.

PHP: Please follow the PHP setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for PHP.

Ruby: Please follow the Ruby setup instructions on the client libraries page and then visit the AutoML Natural Language reference documentation for Ruby.