管理数据集

项目可以有多个数据集,每个数据集用于训练单独的模型。您可以获取可用数据集列表,也可以删除不再需要的数据集

如需了解如何创建数据集并将数据导入其中,请参阅创建数据集并导入数据

列出数据集

一个项目可以包含许多数据集。本部分介绍如何检索项目的可用数据集列表。

如需使用 AutoML Natural Language 界面查看可用数据集的列表,请点击左侧导航菜单顶部的数据集链接。

要查看其他项目的数据集,请从标题栏右上角的下拉列表中选择该项目。

REST

在使用任何请求数据之前,请先进行以下替换:

  • project-id:您的项目 ID
  • location-id:资源的位置,全球位置为 us-central1,欧盟位置为 eu

HTTP 方法和网址:

GET https://automl.googleapis.com/v1/projects/project-id/locations/location-id/datasets

如需发送您的请求,请展开以下选项之一:

您应该收到类似以下内容的 JSON 响应:

{
  "datasets": [
    {
      "name": "projects/434039606874/locations/us-central1/datasets/356587829854924648",
      "displayName": "test_dataset",
      "createTime": "2018-04-26T18:02:59.825060Z",
      "textClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    },
    {
      "name": "projects/434039606874/locations/us-central1/datasets/3104518874390609379",
      "displayName": "test",
      "createTime": "2017-12-16T01:10:38.328280Z",
      "textClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    }
  ]
}

Python

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Python API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"

client = automl.AutoMlClient()
# A resource that represents Google Cloud Platform location.
project_location = f"projects/{project_id}/locations/us-central1"

# List all the datasets available in the region.
request = automl.ListDatasetsRequest(parent=project_location, filter="")
response = client.list_datasets(request=request)

print("List of datasets:")
for dataset in response:
    print(f"Dataset name: {dataset.name}")
    print("Dataset id: {}".format(dataset.name.split("/")[-1]))
    print(f"Dataset display name: {dataset.display_name}")
    print(f"Dataset create time: {dataset.create_time}")
    print(
        "Text classification dataset metadata: {}".format(
            dataset.text_classification_dataset_metadata
        )
    )

Java

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Java API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.ListDatasetsRequest;
import com.google.cloud.automl.v1.LocationName;
import java.io.IOException;

class ListDatasets {

  static void listDatasets() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    listDatasets(projectId);
  }

  // List the datasets
  static void listDatasets(String projectId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // A resource that represents Google Cloud Platform location.
      LocationName projectLocation = LocationName.of(projectId, "us-central1");
      ListDatasetsRequest request =
          ListDatasetsRequest.newBuilder().setParent(projectLocation.toString()).build();

      // List all the datasets available in the region by applying filter.
      System.out.println("List of datasets:");
      for (Dataset dataset : client.listDatasets(request).iterateAll()) {
        // Display the dataset information
        System.out.format("\nDataset name: %s\n", dataset.getName());
        // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
        // required for other methods.
        // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
        String[] names = dataset.getName().split("/");
        String retrievedDatasetId = names[names.length - 1];
        System.out.format("Dataset id: %s\n", retrievedDatasetId);
        System.out.format("Dataset display name: %s\n", dataset.getDisplayName());
        System.out.println("Dataset create time:");
        System.out.format("\tseconds: %s\n", dataset.getCreateTime().getSeconds());
        System.out.format("\tnanos: %s\n", dataset.getCreateTime().getNanos());
        System.out.format(
            "Text classification dataset metadata: %s\n",
            dataset.getTextClassificationDatasetMetadata());
      }
    }
  }
}

Node.js

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Node.js API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function listDatasets() {
  // Construct request
  const request = {
    parent: client.locationPath(projectId, location),
    filter: 'translation_dataset_metadata:*',
  };

  const [response] = await client.listDatasets(request);

  console.log('List of datasets:');
  for (const dataset of response) {
    console.log(`Dataset name: ${dataset.name}`);
    console.log(
      `Dataset id: ${
        dataset.name.split('/')[dataset.name.split('/').length - 1]
      }`
    );
    console.log(`Dataset display name: ${dataset.displayName}`);
    console.log('Dataset create time');
    console.log(`\tseconds ${dataset.createTime.seconds}`);
    console.log(`\tnanos ${dataset.createTime.nanos / 1e9}`);
    console.log(
      `Text classification dataset metadata: ${dataset.textClassificationDatasetMetadata}`
    );
  }
}

listDatasets();

Go

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Go API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
	"google.golang.org/api/iterator"
)

// listDatasets lists existing datasets.
func listDatasets(w io.Writer, projectID string, location string) error {
	// projectID := "my-project-id"
	// location := "us-central1"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.ListDatasetsRequest{
		Parent: fmt.Sprintf("projects/%s/locations/%s", projectID, location),
	}

	it := client.ListDatasets(ctx, req)

	// Iterate over all results
	for {
		dataset, err := it.Next()
		if err == iterator.Done {
			break
		}
		if err != nil {
			return fmt.Errorf("ListGlossaries.Next: %w", err)
		}

		fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
		fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
		fmt.Fprintf(w, "Dataset create time:\n")
		fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
		fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

		// Language text classification
		if metadata := dataset.GetTextClassificationDatasetMetadata(); metadata != nil {
			fmt.Fprintf(w, "Text classification dataset metadata: %v\n", metadata)
		}

	}

	return nil
}

其他语言

C#: 请按照客户端库页面上的 C# 设置说明操作,然后访问 .NET 版 AutoML Natural Language 参考文档。

PHP: 请按照客户端库页面上的 PHP 设置说明操作,然后访问 PHP 版 AutoML Natural Language 参考文档。

Ruby: 请按照客户端库页面上的 Ruby 设置说明操作,然后访问 Ruby 版 AutoML Natural Language 参考文档。

导出数据集

您可以将包含所有数据集信息的 CSV 文件导出到 Cloud Storage 存储桶。导出的 CSV 文件具有与训练数据导入 CSV 相同的格式。

如需导出数据集,请执行以下操作:

  1. 数据集页面选择要从其中导出文档的数据集。

  2. 点击“数据集详细信息”(Dataset details) 页面顶部的导出数据选项。

  3. 导航到要在其中写入导出 CSV 文件的 Cloud Storage 存储桶。

  4. 点击导出 CSV

    数据导出过程完成后,您会收到电子邮件通知。

Python

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Python API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"
# gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/"

client = automl.AutoMlClient()

# Get the full path of the dataset
dataset_full_id = client.dataset_path(project_id, "us-central1", dataset_id)

gcs_destination = automl.GcsDestination(output_uri_prefix=gcs_uri)
output_config = automl.OutputConfig(gcs_destination=gcs_destination)

response = client.export_data(name=dataset_full_id, output_config=output_config)
print(f"Dataset exported. {response.result()}")

Java

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Java API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.cloud.automl.v1.GcsDestination;
import com.google.cloud.automl.v1.OutputConfig;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class ExportDataset {

  static void exportDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsUri = "gs://BUCKET_ID/path_to_export/";
    exportDataset(projectId, datasetId, gcsUri);
  }

  // Export a dataset to a GCS bucket
  static void exportDataset(String projectId, String datasetId, String gcsUri)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setOutputUriPrefix(gcsUri).build();

      // Export the dataset to the output URI.
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

      System.out.println("Processing export...");
      Empty response = client.exportDataAsync(datasetFullId, outputConfig).get();
      System.out.format("Dataset exported. %s\n", response);
    }
  }
}

Node.js

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Node.js API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';
// const gcsUri = 'gs://BUCKET_ID/path_to_export/';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function exportDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
    outputConfig: {
      gcsDestination: {
        outputUriPrefix: gcsUri,
      },
    },
  };

  const [operation] = await client.exportData(request);
  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset exported: ${response}`);
}

exportDataset();

Go

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Go API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
)

// exportDataset exports a dataset.
func exportDataset(w io.Writer, projectID string, location string, datasetID string, outputURI string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."
	// outputURI := "gs://BUCKET_ID/path_to_export/"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.ExportDataRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
		OutputConfig: &automlpb.OutputConfig{
			Destination: &automlpb.OutputConfig_GcsDestination{
				GcsDestination: &automlpb.GcsDestination{
					OutputUriPrefix: outputURI,
				},
			},
		},
	}

	op, err := client.ExportData(ctx, req)
	if err != nil {
		return fmt.Errorf("ExportData: %w", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %w", err)
	}

	fmt.Fprintf(w, "Dataset exported.\n")

	return nil
}

其他语言

C#: 请按照客户端库页面上的 C# 设置说明操作,然后访问 .NET 版 AutoML Natural Language 参考文档。

PHP: 请按照客户端库页面上的 PHP 设置说明操作,然后访问 PHP 版 AutoML Natural Language 参考文档。

Ruby: 请按照客户端库页面上的 Ruby 设置说明操作,然后访问 Ruby 版 AutoML Natural Language 参考文档。

删除数据集

如需在 AutoML Natural Language 界面中删除数据集,请执行以下操作:

  1. 点击待删除数据集最右侧的三点状菜单,然后选择删除数据集

  2. 在确认对话框中点击删除

REST

在使用任何请求数据之前,请先进行以下替换:

  • project-id:您的项目 ID
  • location-id:资源的位置,全球位置为 us-central1,欧盟位置为 eu
  • dataset-id:您的数据集 ID

HTTP 方法和网址:

DELETE https://automl.googleapis.com/v1/projects/project-id/locations/location-id/datasets/dataset-id

如需发送您的请求,请展开以下选项之一:

您应该收到类似以下内容的 JSON 响应:

{
  "name": "projects/434039606874/locations/us-central1/operations/4422270194425422927",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1beta1.OperationMetadata",
    "progressPercentage": 100,
    "createTime": "2018-04-27T02:33:02.479200Z",
    "updateTime": "2018-04-27T02:35:17.309060Z"
  },
  "done": true,
  "response": {
    "@type": "type.googleapis.com/google.protobuf.Empty"
  }
}

Python

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Python API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(project_id, "us-central1", dataset_id)
response = client.delete_dataset(name=dataset_full_id)

print(f"Dataset deleted. {response.result()}")

Java

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Java API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class DeleteDataset {

  static void deleteDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    deleteDataset(projectId, datasetId);
  }

  // Delete a dataset
  static void deleteDataset(String projectId, String datasetId)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the full path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Empty response = client.deleteDatasetAsync(datasetFullId).get();
      System.out.format("Dataset deleted. %s\n", response);
    }
  }
}

Node.js

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Node.js API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function deleteDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [operation] = await client.deleteDataset(request);

  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset deleted: ${response}`);
}

deleteDataset();

Go

如需了解如何安装和使用 AutoML Natural Language 的客户端库,请参阅 AutoML Natural Language 客户端库。 如需了解详情,请参阅 AutoML Natural Language Go API 参考文档

如需向 AutoML Natural Language 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"cloud.google.com/go/automl/apiv1/automlpb"
)

// deleteDataset deletes a dataset.
func deleteDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer client.Close()

	req := &automlpb.DeleteDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	op, err := client.DeleteDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %w", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %w", err)
	}

	fmt.Fprintf(w, "Dataset deleted.\n")

	return nil
}

其他语言

C#: 请按照客户端库页面上的 C# 设置说明操作,然后访问 .NET 版 AutoML Natural Language 参考文档。

PHP: 请按照客户端库页面上的 PHP 设置说明操作,然后访问 PHP 版 AutoML Natural Language 参考文档。

Ruby: 请按照客户端库页面上的 Ruby 设置说明操作,然后访问 Ruby 版 AutoML Natural Language 参考文档。