管理数据集

一个项目可以有多个数据集,每个数据集用于训练不同的模型。您可以获取可用数据集列表、获取特定的数据集导出数据集,并且可以删除不再需要的数据集

列出数据集

本部分介绍如何检索项目的可用数据集列表。

网页界面

要使用 Vision Dashboard 查看可用数据集列表,请点击左侧导航菜单顶部的数据集链接。

数据集列表页面

要查看其他项目的数据集,请从标题栏右上角的下拉列表中选择该项目。

REST 和命令行

在使用下面的任何请求数据之前,请先进行以下替换:

  • project-id:您的 GCP 项目 ID。

HTTP 方法和网址:

GET https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets

如需发送请求,请选择以下方式之一:

curl

执行以下命令:

curl -X GET \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets

PowerShell

执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method GET `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets" | Select-Object -Expand Content

您应该收到类似以下内容的 JSON 响应:



    {
  "datasets": [
    {
      "name": "projects/project-id/locations/us-central1/datasets/dataset-id",
      "displayName": "my_new_dataset",
      "createTime": "2019-10-29T17:31:12.010290Z",
      "etag": "AB3BwFpNUaguCwKeQWtUKLBPQhZr7omCCUBz77pACPIINFpyFe7vbGhp9oZLEEGhIeM=",
      "exampleCount": 3667,
      "imageClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    },
    {
      "name": "projects/project-id/locations/us-central1/datasets/dataset-id",
      "displayName": "new_dataset",
      "createTime": "2019-10-02T00:44:57.821275Z",
      "etag": "AB3BwFpU_ueMZtTD_8dt-9r8BWqunqMC76YbAbmQYQsQEbtQTxs6U3rPpgAMDCXhYPGq",
      "imageClassificationDatasetMetadata": {
        "classificationType": "MULTICLASS"
      }
    }
  ]
}

C#

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。


/// <summary>
/// Demonstrates using the AutoML client to list all datasets.
/// </summary>
/// <param name="projectId">GCP Project ID.</param>
public static object ListDatasets(string projectId = "YOUR-PROJECT-ID")
{
    // Initialize the client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests.
    AutoMlClient client = AutoMlClient.Create();

    // A resource that represents Google Cloud Platform location.
    string projectLocation = LocationName.Format(projectId, "us-central1");
    ListDatasetsRequest request = new ListDatasetsRequest
    {
        Parent = projectLocation
    };

    // List all the datasets available in the region by applying filter.
    Console.WriteLine("List of datasets:");
    foreach (Dataset dataset in client.ListDatasets(request))
    {
        // Display the dataset information
        Console.WriteLine($"Dataset name: {dataset.Name}");
        // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
        // required for other methods.
        // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
        string[] names = dataset.Name.Split("/");
        string retrievedDatasetId = names[names.Length - 1];
        Console.WriteLine($"Dataset id: {retrievedDatasetId}");
        Console.WriteLine($"Dataset display name: {dataset.DisplayName}");
        Console.WriteLine("Dataset create time:");
        Console.WriteLine($"\tseconds: {dataset.CreateTime.Seconds}");
        Console.WriteLine($"\tnanos: {dataset.CreateTime.Nanos}");
        Console.WriteLine(
            $"Image classification dataset metadata: {dataset.ImageClassificationDatasetMetadata}");
    }
    return 0;
}

Go

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	"google.golang.org/api/iterator"
	automlpb "google.golang.org/genproto/googleapis/cloud/automl/v1"
)

// listDatasets lists existing datasets.
func listDatasets(w io.Writer, projectID string, location string) error {
	// projectID := "my-project-id"
	// location := "us-central1"

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)
	}
	defer client.Close()

	req := &automlpb.ListDatasetsRequest{
		Parent: fmt.Sprintf("projects/%s/locations/%s", projectID, location),
	}

	it := client.ListDatasets(ctx, req)

	// Iterate over all results
	for {
		dataset, err := it.Next()
		if err == iterator.Done {
			break
		}
		if err != nil {
			return fmt.Errorf("ListGlossaries.Next: %v", err)
		}

		fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
		fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
		fmt.Fprintf(w, "Dataset create time:\n")
		fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
		fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

		// Vision classification
		if metadata := dataset.GetImageClassificationDatasetMetadata(); metadata != nil {
			fmt.Fprintf(w, "Image classification dataset metadata: %v\n", metadata)
		}

	}

	return nil
}

Java

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.ListDatasetsRequest;
import com.google.cloud.automl.v1.LocationName;
import java.io.IOException;

class ListDatasets {

  static void listDatasets() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    listDatasets(projectId);
  }

  // List the datasets
  static void listDatasets(String projectId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // A resource that represents Google Cloud Platform location.
      LocationName projectLocation = LocationName.of(projectId, "us-central1");
      ListDatasetsRequest request =
          ListDatasetsRequest.newBuilder().setParent(projectLocation.toString()).build();

      // List all the datasets available in the region by applying filter.
      System.out.println("List of datasets:");
      for (Dataset dataset : client.listDatasets(request).iterateAll()) {
        // Display the dataset information
        System.out.format("\nDataset name: %s\n", dataset.getName());
        // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
        // required for other methods.
        // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
        String[] names = dataset.getName().split("/");
        String retrievedDatasetId = names[names.length - 1];
        System.out.format("Dataset id: %s\n", retrievedDatasetId);
        System.out.format("Dataset display name: %s\n", dataset.getDisplayName());
        System.out.println("Dataset create time:");
        System.out.format("\tseconds: %s\n", dataset.getCreateTime().getSeconds());
        System.out.format("\tnanos: %s\n", dataset.getCreateTime().getNanos());
        System.out.format(
            "Image classification dataset metadata: %s\n",
            dataset.getImageClassificationDatasetMetadata());
      }
    }
  }
}

Node.js

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function listDatasets() {
  // Construct request
  const request = {
    parent: client.locationPath(projectId, location),
    filter: 'translation_dataset_metadata:*',
  };

  const [response] = await client.listDatasets(request);

  console.log('List of datasets:');
  for (const dataset of response) {
    console.log(`Dataset name: ${dataset.name}`);
    console.log(
      `Dataset id: ${
        dataset.name.split('/')[dataset.name.split('/').length - 1]
      }`
    );
    console.log(`Dataset display name: ${dataset.displayName}`);
    console.log('Dataset create time');
    console.log(`\tseconds ${dataset.createTime.seconds}`);
    console.log(`\tnanos ${dataset.createTime.nanos / 1e9}`);
    console.log(
      `Image classification dataset metadata: ${dataset.imageClassificationDatasetMetadata}`
    );
  }
}

listDatasets();

PHP

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

use Google\Cloud\AutoMl\V1\AutoMlClient;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';

$client = new AutoMlClient();

try {
    // resource that represents Google Cloud Platform location
    $formattedParent = $client->locationName(
        $projectId,
        $location
    );

    // list all datasets available in the region
    $filter = '';
    $pagedResponse = $client->listDatasets($formattedParent, $filter);

    print('List of datasets' . PHP_EOL);
    foreach ($pagedResponse->iteratePages() as $page) {
        foreach ($page as $dataset) {
            // display dataset information
            $splitName = explode('/', $dataset->getName());
            printf('Dataset name: %s' . PHP_EOL, $dataset->getName());
            printf('Dataset id: %s' . PHP_EOL, end($splitName));
            printf('Dataset display name: %s' . PHP_EOL, $dataset->getDisplayName());
            printf('Dataset create time' . PHP_EOL);
            printf('seconds: %d' . PHP_EOL, $dataset->getCreateTime()->getSeconds());
            printf('nanos : %d' . PHP_EOL, $dataset->getCreateTime()->getNanos());
            printf('Image classification dataset metadata: %s' . PHP_EOL, $dataset->getImageClassificationDatasetMetadata());
        }
    }
} finally {
    $client->close();
}

Python

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"

client = automl.AutoMlClient()
# A resource that represents Google Cloud Platform location.
project_location = client.location_path(project_id, "us-central1")

# List all the datasets available in the region.
response = client.list_datasets(project_location, "")

print("List of datasets:")
for dataset in response:
    print("Dataset name: {}".format(dataset.name))
    print("Dataset id: {}".format(dataset.name.split("/")[-1]))
    print("Dataset display name: {}".format(dataset.display_name))
    print("Dataset create time:")
    print("\tseconds: {}".format(dataset.create_time.seconds))
    print("\tnanos: {}".format(dataset.create_time.nanos))
    print(
        "Image classification dataset metadata: {}".format(
            dataset.image_classification_dataset_metadata
        )
    )

Ruby

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"

client = Google::Cloud::AutoML::AutoML.new

# A resource that represents Google Cloud Platform location.
project_location = client.class.location_path project_id, "us-central1"

# List all the datasets available in the region.
datasets = client.list_datasets project_location

puts "List of datasets:"

datasets.each do |dataset|
  puts "Dataset name: #{dataset.name}"
  puts "Dataset id: #{dataset.name.split('/').last}"
  puts "Dataset display name: #{dataset.display_name}"
  puts "Dataset create time: #{dataset.create_time.to_time}"
  puts "Image classification dataset metadata: #{dataset.image_classification_dataset_metadata}"
end

获取数据集

您还可以使用数据集 ID 获取特定的数据集。

网页界面

要使用 AutoML Vision 界面查看可用数据集列表,请点击左侧导航菜单顶部的数据集链接。

数据集列表页面

要查看其他项目的数据集,请从标题栏左侧的下拉列表中选择该项目。

从列表中选择特定数据集的名称,即可访问该数据集。

REST 和命令行

在使用下面的任何请求数据之前,请先进行以下替换:

  • project-id:您的 GCP 项目 ID。
  • dataset-id:您的数据集的 ID。此 ID 是数据集名称的最后一个元素。例如:
    • 数据集名称:projects/project-id/locations/location-id/datasets/3104518874390609379
    • 数据集 ID:3104518874390609379

HTTP 方法和网址:

GET https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

如需发送请求,请选择以下方式之一:

curl

执行以下命令:

curl -X GET \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

PowerShell

执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method GET `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id" | Select-Object -Expand Content

您应该收到类似以下内容的 JSON 响应:



    {
  "name": "projects/project-id/locations/us-central1/datasets/dataset-id",
  "displayName": "display-name",
  "createTime": "2019-10-29T17:31:12.010290Z",
  "etag": "AB3BwFoP09ffuRNnaWMx4UGi8uvYFctvOBjns84OercuMRIdXr0YINNiUqeW85SB3g4=",
  "exampleCount": 3667,
  "imageClassificationDatasetMetadata": {
    "classificationType": "MULTICLASS"
  }
}

C#

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

/// <summary>
/// Demonstrates using the AutoML client to get a dataset by ID.
/// </summary>
/// <param name="projectId">GCP Project ID.</param>
/// <param name="datasetId">the Id of the dataset.</param>
public static object GetDataset(string projectId = "YOUR-PROJECT-ID",
    string datasetId = "YOUR-DATASET-ID")
{
    // Initialize the client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests.
    AutoMlClient client = AutoMlClient.Create();

    // Get the full path of the model.
    DatasetName datasetName = new DatasetName(projectId, "us-central1", datasetId);
    Console.WriteLine(datasetName);
    Dataset dataset = client.GetDataset(datasetName);

    // Display the dataset information
    Console.WriteLine($"Dataset name: {dataset.Name}");
    Console.WriteLine($"Dataset id: {dataset.DatasetName.DatasetId}");
    Console.WriteLine($"Dataset display name: { dataset.DisplayName}");
    Console.WriteLine("Dataset create time:");
    Console.WriteLine($"\tseconds: {dataset.CreateTime.Seconds}");
    Console.WriteLine($"\tnanos: {dataset.CreateTime.Nanos}");
    Console.WriteLine(
        $"Image classification dataset metadata: {dataset.ImageClassificationDatasetMetadata}");

}

Go

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	automlpb "google.golang.org/genproto/googleapis/cloud/automl/v1"
)

// getDataset gets a dataset.
func getDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)
	}
	defer client.Close()

	req := &automlpb.GetDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	dataset, err := client.GetDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %v", err)
	}

	fmt.Fprintf(w, "Dataset name: %v\n", dataset.GetName())
	fmt.Fprintf(w, "Dataset display name: %v\n", dataset.GetDisplayName())
	fmt.Fprintf(w, "Dataset create time:\n")
	fmt.Fprintf(w, "\tseconds: %v\n", dataset.GetCreateTime().GetSeconds())
	fmt.Fprintf(w, "\tnanos: %v\n", dataset.GetCreateTime().GetNanos())

	// Vision classification
	if metadata := dataset.GetImageClassificationDatasetMetadata(); metadata != nil {
		fmt.Fprintf(w, "Image classification dataset metadata: %v\n", metadata)
	}

	return nil
}

Java

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.Dataset;
import com.google.cloud.automl.v1.DatasetName;
import java.io.IOException;

class GetDataset {

  static void getDataset() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    getDataset(projectId, datasetId);
  }

  // Get a dataset
  static void getDataset(String projectId, String datasetId) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Dataset dataset = client.getDataset(datasetFullId);

      // Display the dataset information
      System.out.format("Dataset name: %s\n", dataset.getName());
      // To get the dataset id, you have to parse it out of the `name` field. As dataset Ids are
      // required for other methods.
      // Name Form: `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}`
      String[] names = dataset.getName().split("/");
      String retrievedDatasetId = names[names.length - 1];
      System.out.format("Dataset id: %s\n", retrievedDatasetId);
      System.out.format("Dataset display name: %s\n", dataset.getDisplayName());
      System.out.println("Dataset create time:");
      System.out.format("\tseconds: %s\n", dataset.getCreateTime().getSeconds());
      System.out.format("\tnanos: %s\n", dataset.getCreateTime().getNanos());
      System.out.format(
          "Image classification dataset metadata: %s\n",
          dataset.getImageClassificationDatasetMetadata());
    }
  }
}

Node.js

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function getDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [response] = await client.getDataset(request);

  console.log(`Dataset name: ${response.name}`);
  console.log(
    `Dataset id: ${
      response.name.split('/')[response.name.split('/').length - 1]
    }`
  );
  console.log(`Dataset display name: ${response.displayName}`);
  console.log('Dataset create time');
  console.log(`\tseconds ${response.createTime.seconds}`);
  console.log(`\tnanos ${response.createTime.nanos / 1e9}`);
  console.log(
    `Image classification dataset metadata: ${response.imageClassificationDatasetMetadata}`
  );
}

getDataset();

PHP

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

use Google\Cloud\AutoMl\V1\AutoMlClient;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $datasetId = 'my_dataset_id_123';

$client = new AutoMlClient();

try {
    // get full path of dataset
    $formattedName = $client->datasetName(
        $projectId,
        $location,
        $datasetId
    );

    $dataset = $client->getDataset($formattedName);

    // display dataset information
    $splitName = explode('/', $dataset->getName());
    printf('Dataset name: %s' . PHP_EOL, $dataset->getName());
    printf('Dataset id: %s' . PHP_EOL, end($splitName));
    printf('Dataset display name: %s' . PHP_EOL, $dataset->getDisplayName());
    printf('Dataset create time' . PHP_EOL);
    printf('seconds: %d' . PHP_EOL, $dataset->getCreateTime()->getSeconds());
    printf('nanos : %d' . PHP_EOL, $dataset->getCreateTime()->getNanos());
    printf('Image classification dataset metadata: %s' . PHP_EOL, $dataset->getImageClassificationDatasetMetadata());
} finally {
    $client->close();
}

Python

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(
    project_id, "us-central1", dataset_id
)
dataset = client.get_dataset(dataset_full_id)

# Display the dataset information
print("Dataset name: {}".format(dataset.name))
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
print("Dataset display name: {}".format(dataset.display_name))
print("Dataset create time:")
print("\tseconds: {}".format(dataset.create_time.seconds))
print("\tnanos: {}".format(dataset.create_time.nanos))
print(
    "Image classification dataset metadata: {}".format(
        dataset.image_classification_dataset_metadata
    )
)

Ruby

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"
dataset_id = "YOUR_DATASET_ID"

client = Google::Cloud::AutoML::AutoML.new
# Get the full path of the dataset
dataset_full_id = client.class.dataset_path project_id, "us-central1", dataset_id
dataset = client.get_dataset dataset_full_id

# Display the dataset information
puts "Dataset name: #{dataset.name}"
puts "Dataset id: #{dataset.name.split('/').last}"
puts "Dataset display name: #{dataset.display_name}"
puts "Dataset create time: #{dataset.create_time.to_time}"
puts "Image classification dataset metadata: #{dataset.image_classification_dataset_metadata}"

导出数据集

您可以将包含数据集的所有信息的 CSV 文件导出到 Google Cloud Storage 存储分区。

网页界面

要导出非空数据集,请完成以下步骤:

  1. 数据集页面中,选择非空数据集。

    数据集列表页面

    选择非空数据集,即可进入数据集详情 (Dataset details) 页面。

    “为训练图片添加标签”界面

  2. 选择“数据集详情”页面顶部的导出数据选项。

    “导出数据集”按钮

    此操作会打开一个窗口,供您选择 Google Cloud Storage 存储分区位置,或创建新存储分区并将其指定为 CSV 文件的存储位置。

    标签统计信息弹出式窗口

  3. 在您选择了新的或现有的 Google Cloud Storage 存储分区位置后,选择导出 CSV

    “导出 CSV”按钮

  4. 数据导出过程完成后,您会收到电子邮件通知。

REST 和命令行

在使用下面的任何请求数据之前,请先进行以下替换:

  • project-id:您的 GCP 项目 ID。
  • dataset-id:您的数据集的 ID。此 ID 是数据集名称的最后一个元素。例如:
    • 数据集名称:projects/project-id/locations/location-id/datasets/3104518874390609379
    • 数据集 ID:3104518874390609379
  • output-storage-bucket:用于保存输出文件的 Google Cloud Storage 存储分区/目录,采用以下格式表示:gs://bucket/directory/。 发出请求的用户必须具有相应存储分区的写入权限。

HTTP 方法和网址:

POST https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id:exportData

请求 JSON 正文:

{
  "outputConfig": {
    "gcsDestination": {
      "outputUriPrefix": "cloud-storage-bucket"
    }
  }
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id:exportData

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id:exportData" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。可以使用操作 ID 来获取任务的状态。如需查看示例,请参阅处理长时间运行的操作

{
  "name": "projects/project-id/locations/us-central1/operations/operation-id",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1.OperationMetadata",
    "createTime": "2019-08-12T18:52:31.637075Z",
    "updateTime": "2019-08-12T18:52:31.637075Z",
    "exportDataDetails": {
      "outputInfo": {
        "gcsOutputDirectory": "cloud-storage-bucket/export_data-dataset-name-timestamp-of-export-call/"
      }
    }
  }
}

C#

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

/// <summary>
/// Demonstrates using the AutoML client to export a dataset to a Google Cloud Storage bucket.
/// </summary>
/// <param name="projectId">GCP Project ID.</param>
/// <param name="datasetId">the Id of the dataset.</param>
/// <param name="gcsUri">the Destination URI (Google Cloud Storage).</param>
public static object ExportDataset(string projectId = "YOUR-PROJECT-ID",
    string datasetId = "YOUR-DATASET-ID", string gcsUri = "gs://BUCKET_ID/path_to_export/")
{
    // Initialize the client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests.
    AutoMlClient client = AutoMlClient.Create();

    // Get the complete path of the dataset.
    DatasetName datasetFullId = new DatasetName(projectId, "us-central1", datasetId);
    GcsDestination gcsDestination = new GcsDestination
    {
        OutputUriPrefix = gcsUri
    };

    // Export the dataset to the output URI.
    OutputConfig outputConfig = new OutputConfig
    {
        GcsDestination = gcsDestination
    };

    var result = Task.Run(() => client.ExportDataAsync(datasetFullId, outputConfig)).Result;
    Console.WriteLine("Processing export...");
    result.PollUntilCompleted();
    Console.WriteLine($"Dataset exported.");
    return 0;
}

Java

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.cloud.automl.v1.GcsDestination;
import com.google.cloud.automl.v1.OutputConfig;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class ExportDataset {

  static void exportDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsUri = "gs://BUCKET_ID/path_to_export/";
    exportDataset(projectId, datasetId, gcsUri);
  }

  // Export a dataset to a GCS bucket
  static void exportDataset(String projectId, String datasetId, String gcsUri)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      GcsDestination gcsDestination =
          GcsDestination.newBuilder().setOutputUriPrefix(gcsUri).build();

      // Export the dataset to the output URI.
      OutputConfig outputConfig =
          OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

      System.out.println("Processing export...");
      Empty response = client.exportDataAsync(datasetFullId, outputConfig).get();
      System.out.format("Dataset exported. %s\n", response);
    }
  }
}

Node.js

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';
// const gcsUri = 'gs://BUCKET_ID/path_to_export/';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function exportDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
    outputConfig: {
      gcsDestination: {
        outputUriPrefix: gcsUri,
      },
    },
  };

  const [operation] = await client.exportData(request);
  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset exported: ${response}`);
}

exportDataset();

PHP

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

use Google\Cloud\AutoMl\V1\AutoMlClient;
use Google\Cloud\AutoMl\V1\GcsDestination;
use Google\Cloud\AutoMl\V1\OutputConfig;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $datasetId = 'my_dataset_id_123';
// $gcsUri = 'gs://BUCKET_ID/path_to_export/';

$client = new AutoMlClient();

try {
    // get full path of dataset
    $formattedName = $client->datasetName(
        $projectId,
        $location,
        $datasetId
    );

    // set GCS uri
    $gcsDestination = (new GcsDestination())
        ->setInputUri($gcsUri);
    $outputConfig = (new OutputConfig())
        ->setGcsDestination($gcsDestination);

    $operationResponse = $client->exportData($formattedName, $outputConfig);
    $operationResponse->pollUntilComplete();
    if ($operationResponse->operationSucceeded()) {
        $result = $operationResponse->getResult();
        printf('Dataset exported.' . PHP_EOL);
    } else {
        $error = $operationResponse->getError();
        // handleError($error)
    }
} finally {
    $client->close();
}

Python

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"
# gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/"

client = automl.AutoMlClient()

# Get the full path of the dataset
dataset_full_id = client.dataset_path(
    project_id, "us-central1", dataset_id
)

gcs_destination = automl.types.GcsDestination(output_uri_prefix=gcs_uri)
output_config = automl.types.OutputConfig(gcs_destination=gcs_destination)

response = client.export_data(dataset_full_id, output_config)
print("Dataset exported. {}".format(response.result()))

Ruby

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"
dataset_id = "YOUR_DATASET_ID"
gcs_uri = "gs://BUCKET_ID/path_to_export/"

client = Google::Cloud::AutoML::AutoML.new

# Get the full path of the dataset
dataset_full_id = client.class.dataset_path project_id, "us-central1", dataset_id
output_config = {
  gcs_destination: {
    output_uri_prefix: gcs_uri
  }
}

operation = client.export_data dataset_full_id, output_config

# Wait until the long running operation is done
operation.wait_until_done!

puts "Dataset exported."

导出的 CSV 格式

导出的 CSV 文件包含与训练数据导入 CSV 相同的格式:

set,path,label0[,label1,label2,...]

此 CSV 文件保存在已创建的导出文件夹中,该文件夹通过唯一的时间戳进行区分。以下是导出的 CSV 文件中的一些示例行:

my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/image_classification_1.csv

TRAIN,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img874.jpg,dandelion
VALIDATION,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img447.jpg,roses
TRAIN,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img672.jpg,dandelion
VALIDATION,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img421.jpg,sunflowers
TRAIN,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img495.jpg,tulips
TEST,gs://my-storage-bucket/export_data-my_dataset_name-2019-11-08T22:28:13.081Z/files/img014.jpg,sunflowers

删除数据集

您可以在界面中删除不再需要的数据集,或者通过以下代码示例使用数据集的 ID 删除数据集。

网页界面

  1. Vision Dashboard 中,点击左侧导航菜单顶部的数据集链接以显示可用数据集列表。

  2. 点击待删除行最右侧的三点状菜单,然后选择删除数据集

  3. 在确认对话框中点击删除

    数据集列表页面

REST 和命令行

在使用下面的任何请求数据之前,请先进行以下替换:

  • project-id:您的 GCP 项目 ID。
  • dataset-id:您的数据集的 ID。此 ID 是数据集名称的最后一个元素。例如:
    • 数据集名称:projects/project-id/locations/location-id/datasets/3104518874390609379
    • 数据集 ID:3104518874390609379

HTTP 方法和网址:

DELETE https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

如需发送请求,请选择以下方式之一:

curl

执行以下命令:

curl -X DELETE \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id

PowerShell

执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method DELETE `
-Headers $headers `
-Uri "https://automl.googleapis.com/v1/projects/project-id/locations/us-central1/datasets/dataset-id" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。可以使用操作 ID 来获取任务的状态。如需查看示例,请参阅处理长时间运行的操作

{
  "name": "projects/project-id/locations/us-central1/operations/operation-id",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.automl.v1.OperationMetadata",
    "createTime": "2019-11-08T22:37:19.822128Z",
    "updateTime": "2019-11-08T22:37:19.822128Z",
    "deleteDetails": {}
  },
  "done": true,
  "response": {
    "@type": "type.googleapis.com/google.protobuf.Empty"
  }
}

C#

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

/// <summary>
/// Deletes a dataset and all of its contents.
/// </summary>
/// <param name="projectId">GCP Project ID.</param>
/// <param name="datasetId">the Id of the dataset.</param>
public static object DeleteDataset(string projectId = "YOUR-PROJECT-ID",
    string datasetId = "YOUR-DATASET-ID")
{
    // Initialize the client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests.
    AutoMlClient client = AutoMlClient.Create();

    // Get the full path of the dataset.
    DatasetName datasetFullId = new DatasetName(projectId, "us-central1", datasetId);
    var result = Task.Run(() => client.DeleteDatasetAsync(datasetFullId)).Result;
    result.PollUntilCompleted();
    Console.WriteLine("Dataset deleted.");

    return 0;
}

Go

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

import (
	"context"
	"fmt"
	"io"

	automl "cloud.google.com/go/automl/apiv1"
	automlpb "google.golang.org/genproto/googleapis/cloud/automl/v1"
)

// deleteDataset deletes a dataset.
func deleteDataset(w io.Writer, projectID string, location string, datasetID string) error {
	// projectID := "my-project-id"
	// location := "us-central1"
	// datasetID := "TRL123456789..."

	ctx := context.Background()
	client, err := automl.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %v", err)
	}
	defer client.Close()

	req := &automlpb.DeleteDatasetRequest{
		Name: fmt.Sprintf("projects/%s/locations/%s/datasets/%s", projectID, location, datasetID),
	}

	op, err := client.DeleteDataset(ctx, req)
	if err != nil {
		return fmt.Errorf("DeleteDataset: %v", err)
	}
	fmt.Fprintf(w, "Processing operation name: %q\n", op.Name())

	if err := op.Wait(ctx); err != nil {
		return fmt.Errorf("Wait: %v", err)
	}

	fmt.Fprintf(w, "Dataset deleted.\n")

	return nil
}

Java

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

import com.google.cloud.automl.v1.AutoMlClient;
import com.google.cloud.automl.v1.DatasetName;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.concurrent.ExecutionException;

class DeleteDataset {

  static void deleteDataset() throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    deleteDataset(projectId, datasetId);
  }

  // Delete a dataset
  static void deleteDataset(String projectId, String datasetId)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the full path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);
      Empty response = client.deleteDatasetAsync(datasetFullId).get();
      System.out.format("Dataset deleted. %s\n", response);
    }
  }
}

Node.js

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'us-central1';
// const datasetId = 'YOUR_DATASET_ID';

// Imports the Google Cloud AutoML library
const {AutoMlClient} = require('@google-cloud/automl').v1;

// Instantiates a client
const client = new AutoMlClient();

async function deleteDataset() {
  // Construct request
  const request = {
    name: client.datasetPath(projectId, location, datasetId),
  };

  const [operation] = await client.deleteDataset(request);

  // Wait for operation to complete.
  const [response] = await operation.promise();
  console.log(`Dataset deleted: ${response}`);
}

deleteDataset();

PHP

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

use Google\Cloud\AutoMl\V1\AutoMlClient;

/** Uncomment and populate these variables in your code */
// $projectId = '[Google Cloud Project ID]';
// $location = 'us-central1';
// $datasetId = 'my_dataset_id_123';

$client = new AutoMlClient();

try {
    // get full path of dataset
    $formattedName = $client->datasetName(
        $projectId,
        $location,
        $datasetId
    );

    $operationResponse = $client->deleteDataset($formattedName);
    $operationResponse->pollUntilComplete();
    if ($operationResponse->operationSucceeded()) {
        $result = $operationResponse->getResult();
        printf('Dataset deleted.' . PHP_EOL);
    } else {
        $error = $operationResponse->getError();
        // handleError($error)
    }
} finally {
    $client->close();
}

Python

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(
    project_id, "us-central1", dataset_id
)
response = client.delete_dataset(dataset_full_id)

print("Dataset deleted. {}".format(response.result()))

Ruby

在试用此示例之前,请按照客户端库页面中与此编程语言对应的设置说明执行操作。

require "google/cloud/automl"

project_id = "YOUR_PROJECT_ID"
dataset_id = "YOUR_DATASET_ID"

client = Google::Cloud::AutoML::AutoML.new

# Get the full path of the dataset
dataset_full_id = client.class.dataset_path project_id, "us-central1", dataset_id

operation = client.delete_dataset dataset_full_id

# Wait until the long running operation is done
operation.wait_until_done!

puts "Dataset deleted."