使用 API 创建数据集

要创建机器学习模型,您必须先有一组用于训练的代表性数据。使用 API(或控制台)创建空数据集并将数据导入数据集。导入数据后,您可以进行修改并开始模型训练。

创建数据集

使用以下示例为您的数据创建数据集。

请在下面选择您的数据类型:

图片

为数据集指定的数据集架构取决于要用于训练的数据类型:图片、表格、文本或视频。单个图片数据集可用于多个目标,例如分类或对象检测。

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:存储数据集的区域。必须是支持数据集资源的区域。例如 us-central1
  • PROJECT:您的项目 ID 或项目编号。
  • DATASET_NAME:数据集的名称。
  • PROJECT_NUMBER:您的项目的项目编号(显示在响应中)。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets

请求 JSON 正文:

{
  "display_name": "DATASET_NAME",
  "metadata_schema_uri": "gs://google-cloud-aiplatform/schema/dataset/metadata/image_1.0.0.yaml"
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.CreateDatasetOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-07T21:27:35.964882Z",
      "updateTime": "2020-07-07T21:27:35.964882Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateDatasetImageSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetDisplayName = "YOUR_DATASET_DISPLAY_NAME";
    createDatasetImageSample(project, datasetDisplayName);
  }

  static void createDatasetImageSample(String project, String datasetDisplayName)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String metadataSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/metadata/image_1.0.0.yaml";
      LocationName locationName = LocationName.of(project, location);
      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(datasetDisplayName)
              .setMetadataSchemaUri(metadataSchemaUri)
              .build();

      OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
          datasetServiceClient.createDatasetAsync(locationName, dataset);
      System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      Dataset datasetResponse = datasetFuture.get(120, TimeUnit.SECONDS);

      System.out.println("Create Image Dataset Response");
      System.out.format("Name: %s\n", datasetResponse.getName());
      System.out.format("Display Name: %s\n", datasetResponse.getDisplayName());
      System.out.format("Metadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
      System.out.format("Metadata: %s\n", datasetResponse.getMetadata());
      System.out.format("Create Time: %s\n", datasetResponse.getCreateTime());
      System.out.format("Update Time: %s\n", datasetResponse.getUpdateTime());
      System.out.format("Labels: %s\n", datasetResponse.getLabelsMap());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetDisplayName = "YOUR_DATASTE_DISPLAY_NAME";
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function createDatasetImage() {
  // Configure the parent resource
  const parent = `projects/${project}/locations/${location}`;
  // Configure the dataset resource
  const dataset = {
    displayName: datasetDisplayName,
    metadataSchemaUri:
      'gs://google-cloud-aiplatform/schema/dataset/metadata/image_1.0.0.yaml',
  };
  const request = {
    parent,
    dataset,
  };

  // Create Dataset Request
  const [response] = await datasetServiceClient.createDataset(request);
  console.log(`Long running operation: ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Create dataset image response');
  console.log(`Name : ${result.name}`);
  console.log(`Display name : ${result.displayName}`);
  console.log(`Metadata schema uri : ${result.metadataSchemaUri}`);
  console.log(`Metadata : ${JSON.stringify(result.metadata)}`);
  console.log(`Labels : ${JSON.stringify(result.labels)}`);
}
createDatasetImage();

Python

以下示例使用 Python 版 Vertex SDK 创建数据集并导入数据。如果您运行此示例代码,则可以跳过本指南的导入数据部分

此特定示例导入用于单标签分类的数据。如果模型的目标不同,则必须调整代码。

def create_and_import_dataset_image_sample(
    project: str,
    location: str,
    display_name: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.ImageDataset.create(
        display_name=display_name,
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.image.single_label_classification,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

表格

创建数据集时,您也会将其与数据源关联。创建数据集所需的代码取决于训练数据是位于 Cloud Storage 还是 BigQuery 中。如果数据源位于其他项目中,请确保设置所需权限

使用 Cloud Storage 中的数据创建数据集

REST 和命令行

您可以使用 datasets.create 方法创建数据集。

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:存储数据集的区域。必须是支持数据集资源的区域。例如 us-central1
  • PROJECT:您的项目 ID 或项目编号。
  • DATASET_NAME:数据集的显示名。
  • METADATA_SCHEMA_URI:目标的架构文件的 URI。
    • 分类:gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml
    • 预测:gs://google-cloud-aiplatform/schema/dataset/metadata/time_series_1.0.0.yaml
    • 回归:gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml
  • URI:包含训练数据的 Cloud Storage 存储分区的路径 (URI)。可以有多个路径。每个 URI 的格式如下:
    gs://GCSprojectId/bucketName/fileName
    
  • PROJECT_NUMBER:您的项目的项目编号(显示在响应中)。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets

请求 JSON 正文:

{
  "display_name": "DATASET_NAME",
  "metadata_schema_uri": "METADATA_SCHEMA_URI",
  "metadata": {
    "input_config": {
      "gcs_source": {
        "uri": [URI1, URI2, ...]
      }
    }
  }
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets" | Select-Object -Expand Content

您应会收到如下所示的 JSON 响应:

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.CreateDatasetOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-07T21:27:35.964882Z",
      "updateTime": "2020-07-07T21:27:35.964882Z"
    }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateDatasetTabularGcsSample {

  public static void main(String[] args)
      throws InterruptedException, ExecutionException, TimeoutException, IOException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetDisplayName = "YOUR_DATASET_DISPLAY_NAME";
    String gcsSourceUri = "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_gcs_table/file.csv";
    ;
    createDatasetTableGcs(project, datasetDisplayName, gcsSourceUri);
  }

  static void createDatasetTableGcs(String project, String datasetDisplayName, String gcsSourceUri)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    DatasetServiceSettings settings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient = DatasetServiceClient.create(settings)) {
      String location = "us-central1";
      String metadataSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/metadata/tables_1.0.0.yaml";
      LocationName locationName = LocationName.of(project, location);

      String jsonString =
          "{\"input_config\": {\"gcs_source\": {\"uri\": [\"" + gcsSourceUri + "\"]}}}";
      Value.Builder metaData = Value.newBuilder();
      JsonFormat.parser().merge(jsonString, metaData);

      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(datasetDisplayName)
              .setMetadataSchemaUri(metadataSchemaUri)
              .setMetadata(metaData)
              .build();

      OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
          datasetServiceClient.createDatasetAsync(locationName, dataset);
      System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      Dataset datasetResponse = datasetFuture.get(300, TimeUnit.SECONDS);

      System.out.println("Create Dataset Table GCS sample");
      System.out.format("Name: %s\n", datasetResponse.getName());
      System.out.format("Display Name: %s\n", datasetResponse.getDisplayName());
      System.out.format("Metadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
      System.out.format("Metadata: %s\n", datasetResponse.getMetadata());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetDisplayName = 'YOUR_DATASET_DISPLAY_NAME';
// const gcsSourceUri = 'YOUR_GCS_SOURCE_URI';
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function createDatasetTabularGcs() {
  // Configure the parent resource
  const parent = `projects/${project}/locations/${location}`;
  const metadata = {
    structValue: {
      fields: {
        inputConfig: {
          structValue: {
            fields: {
              gcsSource: {
                structValue: {
                  fields: {
                    uri: {
                      listValue: {
                        values: [{stringValue: gcsSourceUri}],
                      },
                    },
                  },
                },
              },
            },
          },
        },
      },
    },
  };
  // Configure the dataset resource
  const dataset = {
    displayName: datasetDisplayName,
    metadataSchemaUri:
      'gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml',
    metadata: metadata,
  };
  const request = {
    parent,
    dataset,
  };

  // Create dataset request
  const [response] = await datasetServiceClient.createDataset(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Create dataset tabular gcs response');
  console.log(`\tName : ${result.name}`);
  console.log(`\tDisplay name : ${result.displayName}`);
  console.log(`\tMetadata schema uri : ${result.metadataSchemaUri}`);
  console.log(`\tMetadata : ${JSON.stringify(result.metadata)}`);
}
createDatasetTabularGcs();

Python

def create_and_import_dataset_tabular_gcs_sample(
    display_name: str, project: str, location: str, gcs_source: Union[str, List[str]],
):

    aiplatform.init(project=project, location=location)

    dataset = aiplatform.TabularDataset.create(
        display_name=display_name, gcs_source=gcs_source,
    )

    dataset.wait()

    print(f'\tDataset: "{dataset.display_name}"')
    print(f'\tname: "{dataset.resource_name}"')

使用 BigQuery 中的数据创建数据集

REST 和命令行

您可以使用 datasets.create 方法创建数据集。

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:存储数据集的区域。必须是支持数据集资源的区域。例如 us-central1
  • PROJECT:您的项目 ID 或项目编号。
  • DATASET_NAME:数据集的显示名。
  • METADATA_SCHEMA_URI:目标的架构文件的 URI。
    • 分类:gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml
    • 预测:gs://google-cloud-aiplatform/schema/dataset/metadata/time_series_1.0.0.yaml
    • 回归:gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml
  • URI:包含训练数据的 BigQuery 表的路径。在此表单中执行以下操作:
    bq://bqprojectId.bqDatasetId.bqTableId
    
  • PROJECT_NUMBER:您的项目的项目编号(显示在响应中)。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets

请求 JSON 正文:

{
  "display_name": "DATASET_NAME",
  "metadata_schema_uri": "METADATA_SCHEMA_URI",
  "metadata": {
    "input_config": {
      "bigquery_source" :{
        "uri": "URI
      }
    }
  }
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets" | Select-Object -Expand Content

您应会收到如下所示的 JSON 响应:

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.CreateDatasetOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-07T21:27:35.964882Z",
      "updateTime": "2020-07-07T21:27:35.964882Z"
    }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateDatasetTabularBigquerySample {

  public static void main(String[] args)
      throws InterruptedException, ExecutionException, TimeoutException, IOException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String bigqueryDisplayName = "YOUR_DATASET_DISPLAY_NAME";
    String bigqueryUri =
        "bq://YOUR_GOOGLE_CLOUD_PROJECT_ID.BIGQUERY_DATASET_ID.BIGQUERY_TABLE_OR_VIEW_ID";
    createDatasetTableBigquery(project, bigqueryDisplayName, bigqueryUri);
  }

  static void createDatasetTableBigquery(
      String project, String bigqueryDisplayName, String bigqueryUri)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    DatasetServiceSettings settings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient = DatasetServiceClient.create(settings)) {
      String location = "us-central1";
      String metadataSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/metadata/tables_1.0.0.yaml";
      LocationName locationName = LocationName.of(project, location);

      String jsonString =
          "{\"input_config\": {\"bigquery_source\": {\"uri\": \"" + bigqueryUri + "\"}}}";
      Value.Builder metaData = Value.newBuilder();
      JsonFormat.parser().merge(jsonString, metaData);

      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(bigqueryDisplayName)
              .setMetadataSchemaUri(metadataSchemaUri)
              .setMetadata(metaData)
              .build();

      OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
          datasetServiceClient.createDatasetAsync(locationName, dataset);
      System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      Dataset datasetResponse = datasetFuture.get(300, TimeUnit.SECONDS);

      System.out.println("Create Dataset Table Bigquery sample");
      System.out.format("Name: %s\n", datasetResponse.getName());
      System.out.format("Display Name: %s\n", datasetResponse.getDisplayName());
      System.out.format("Metadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
      System.out.format("Metadata: %s\n", datasetResponse.getMetadata());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetDisplayName = 'YOUR_DATASET_DISPLAY_NAME';
// const bigquerySourceUri = 'YOUR_BIGQUERY_SOURCE_URI';
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function createDatasetTabularBigquery() {
  // Configure the parent resource
  const parent = `projects/${project}/locations/${location}`;
  const metadata = {
    structValue: {
      fields: {
        inputConfig: {
          structValue: {
            fields: {
              bigquerySource: {
                structValue: {
                  fields: {
                    uri: {
                      listValue: {
                        values: [{stringValue: bigquerySourceUri}],
                      },
                    },
                  },
                },
              },
            },
          },
        },
      },
    },
  };
  // Configure the dataset resource
  const dataset = {
    displayName: datasetDisplayName,
    metadataSchemaUri:
      'gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml',
    metadata: metadata,
  };
  const request = {
    parent,
    dataset,
  };

  // Create dataset request
  const [response] = await datasetServiceClient.createDataset(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Create dataset tabular bigquery response');
  console.log(`\tName : ${result.name}`);
  console.log(`\tDisplay name : ${result.displayName}`);
  console.log(`\tMetadata schema uri : ${result.metadataSchemaUri}`);
  console.log(`\tMetadata : ${JSON.stringify(result.metadata)}`);
}
createDatasetTabularBigquery();

Python

def create_and_import_dataset_tabular_bigquery_sample(
    display_name: str, project: str, location: str, bigquery_source: str,
):

    aiplatform.init(project=project, location=location)

    dataset = aiplatform.TabularDataset.create(
        display_name=display_name, bigquery_source=bigquery_source,
    )

    dataset.wait()

    print(f'\tDataset: "{dataset.display_name}"')
    print(f'\tname: "{dataset.resource_name}"')

文本

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:存储数据集的区域。必须是支持数据集资源的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_NAME:数据集的名称。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets

请求 JSON 正文:

{
  "display_name": "DATASET_NAME",
  "metadata_schema_uri": "gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml"
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.CreateDatasetOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-07T21:27:35.964882Z",
      "updateTime": "2020-07-07T21:27:35.964882Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateDatasetTextSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetDisplayName = "YOUR_DATASET_DISPLAY_NAME";

    createDatasetTextSample(project, datasetDisplayName);
  }

  static void createDatasetTextSample(String project, String datasetDisplayName)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String metadataSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml";

      LocationName locationName = LocationName.of(project, location);
      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(datasetDisplayName)
              .setMetadataSchemaUri(metadataSchemaUri)
              .build();

      OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
          datasetServiceClient.createDatasetAsync(locationName, dataset);
      System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());

      System.out.println("Waiting for operation to finish...");
      Dataset datasetResponse = datasetFuture.get(180, TimeUnit.SECONDS);

      System.out.println("Create Text Dataset Response");
      System.out.format("\tName: %s\n", datasetResponse.getName());
      System.out.format("\tDisplay Name: %s\n", datasetResponse.getDisplayName());
      System.out.format("\tMetadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
      System.out.format("\tMetadata: %s\n", datasetResponse.getMetadata());
      System.out.format("\tCreate Time: %s\n", datasetResponse.getCreateTime());
      System.out.format("\tUpdate Time: %s\n", datasetResponse.getUpdateTime());
      System.out.format("\tLabels: %s\n", datasetResponse.getLabelsMap());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetDisplayName = "YOUR_DATASTE_DISPLAY_NAME";
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function createDatasetText() {
  // Configure the parent resource
  const parent = `projects/${project}/locations/${location}`;
  // Configure the dataset resource
  const dataset = {
    displayName: datasetDisplayName,
    metadataSchemaUri:
      'gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml',
  };
  const request = {
    parent,
    dataset,
  };

  // Create Dataset Request
  const [response] = await datasetServiceClient.createDataset(request);
  console.log(`Long running operation: ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Create dataset text response');
  console.log(`Name : ${result.name}`);
  console.log(`Display name : ${result.displayName}`);
  console.log(`Metadata schema uri : ${result.metadataSchemaUri}`);
  console.log(`Metadata : ${JSON.stringify(result.metadata)}`);
  console.log(`Labels : ${JSON.stringify(result.labels)}`);
}
createDatasetText();

Python

以下示例使用 Python 版 Vertex SDK 创建数据集并导入数据。如果您运行此示例代码,则可以跳过本指南的导入数据部分

此特定示例导入用于单标签分类的数据。如果模型的目标不同,则必须调整代码。

def create_and_import_dataset_text_sample(
    project: str,
    location: str,
    display_name: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.TextDataset.create(
        display_name=display_name,
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.text.single_label_classification,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

视频

在下面选择您的目标对应的标签页:

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:存储数据集的区域。必须是支持数据集资源的区域。例如 us-central1
  • PROJECT:您的项目 ID 或项目编号。
  • DATASET_NAME:数据集的名称。
  • PROJECT_NUMBER:您的项目的项目编号(显示在响应中)。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets

请求 JSON 正文:

{
  "display_name": "DATASET_NAME",
  "metadata_schema_uri": "gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml"
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.CreateDatasetOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-07T21:27:35.964882Z",
      "updateTime": "2020-07-07T21:27:35.964882Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateDatasetVideoSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetVideoDisplayName = "YOUR_DATASET_VIDEO_DISPLAY_NAME";
    createDatasetSample(datasetVideoDisplayName, project);
  }

  static void createDatasetSample(String datasetVideoDisplayName, String project)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String metadataSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml";
      LocationName locationName = LocationName.of(project, location);
      Dataset dataset =
          Dataset.newBuilder()
              .setDisplayName(datasetVideoDisplayName)
              .setMetadataSchemaUri(metadataSchemaUri)
              .build();

      OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
          datasetServiceClient.createDatasetAsync(locationName, dataset);
      System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      Dataset datasetResponse = datasetFuture.get(300, TimeUnit.SECONDS);

      System.out.println("Create Dataset Video Response");
      System.out.format("Name: %s\n", datasetResponse.getName());
      System.out.format("Display Name: %s\n", datasetResponse.getDisplayName());
      System.out.format("Metadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
      System.out.format("Metadata: %s\n", datasetResponse.getMetadata());
      System.out.format("Create Time: %s\n", datasetResponse.getCreateTime());
      System.out.format("Update Time: %s\n", datasetResponse.getUpdateTime());
      System.out.format("Labels: %s\n", datasetResponse.getLabelsMap());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetDisplayName = "YOUR_DATASTE_DISPLAY_NAME";
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function createDatasetVideo() {
  // Configure the parent resource
  const parent = `projects/${project}/locations/${location}`;
  // Configure the dataset resource
  const dataset = {
    displayName: datasetDisplayName,
    metadataSchemaUri:
      'gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml',
  };
  const request = {
    parent,
    dataset,
  };

  // Create Dataset Request
  const [response] = await datasetServiceClient.createDataset(request);
  console.log(`Long running operation: ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Create dataset video response');
  console.log(`Name : ${result.name}`);
  console.log(`Display name : ${result.displayName}`);
  console.log(`Metadata schema uri : ${result.metadataSchemaUri}`);
  console.log(`Metadata : ${JSON.stringify(result.metadata)}`);
  console.log(`Labels : ${JSON.stringify(result.labels)}`);
}
createDatasetVideo();

Python

以下示例使用 Python 版 Vertex SDK 创建数据集并导入数据。如果您运行此示例代码,则可以跳过本指南的导入数据部分

此特定示例导入用于分类的数据。如果模型的目标不同,则必须调整代码。

def create_and_import_dataset_video_sample(
    project: str,
    location: str,
    display_name: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.VideoDataset.create(
        display_name=display_name,
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.video.classification,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

导入数据

对于图片、文本和视频数据,创建空数据集后,您可以将数据导入数据集。如果您之前使用 Python 版 Vertex SDK 创建数据集,则可能在创建数据集时已经导入数据。如果是这样,您可以跳过此部分。

您不应将数据导入表格数据集;数据与数据集关联,但不会导入。

请在下面选择您的数据类型:

图片

在数据导入时,您必须根据模型的目标指定具体的架构。对于图片数据,可以使用以下模型目标:

  • 单标签分类:获取图片的单个标签注释。
  • 多标签分类:获取图片的多个标签注释。
  • 对象检测:获取图片中关键对象的边界框和标签注释。

在下面选择您的目标对应的标签页:

单标签分类

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:数据集所在的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备图片数据

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/image_classification_single_label_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-08T20:32:02.543801Z",
      "updateTime": "2020-07-08T20:32:02.543801Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataImageClassificationSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsSourceUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_image_source/[file.csv/file.jsonl]";
    importDataImageClassificationSample(project, datasetId, gcsSourceUri);
  }

  static void importDataImageClassificationSample(
      String project, String datasetId, String gcsSourceUri)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "image_classification_single_label_io_format_1.0.0.yaml";

      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);
      DatasetName datasetName = DatasetName.of(project, location, datasetId);

      List<ImportDataConfig> importDataConfigList =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigList);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(300, TimeUnit.SECONDS);

      System.out.format(
          "Import Data Image Classification Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetId = "YOUR_DATASET_ID";
// const gcsSourceUri = "YOUR_GCS_SOURCE_URI";
// eg. "gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]"
// const project = "YOUR_PROJECT_ID";
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataImageClassification() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/image_classification_single_label_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Create Import Data Request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation: ${response.name}`);

  // Wait for operation to complete
  const [importDataResponse] = await response.promise();

  console.log(
    `Import data image classification response : \
      ${JSON.stringify(importDataResponse)}`
  );
}
importDataImageClassification();

Python

def image_dataset_import_data_sample(
    project: str, location: str, src_uris: list, import_schema_uri: str, dataset_id: str
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.ImageDataset(dataset_id)

    ds = ds.import_data(
        gcs_source=src_uris, import_schema_uri=import_schema_uri, sync=True
    )

    print(ds.display_name)
    print(ds.name)
    print(ds.resource_name)
    return ds

多标签分类

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:数据集所在的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备图片数据

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/image_classification_multi_label_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-08T20:32:02.543801Z",
      "updateTime": "2020-07-08T20:32:02.543801Z"
    }
  }
}

Python

def image_dataset_import_data_sample(
    project: str, location: str, src_uris: list, import_schema_uri: str, dataset_id: str
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.ImageDataset(dataset_id)

    ds = ds.import_data(
        gcs_source=src_uris, import_schema_uri=import_schema_uri, sync=True
    )

    print(ds.display_name)
    print(ds.name)
    print(ds.resource_name)
    return ds

对象检测

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:数据集所在的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备图片数据

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/image_bounding_box_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-08T20:32:02.543801Z",
      "updateTime": "2020-07-08T20:32:02.543801Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataImageObjectDetectionSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsSourceUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_image_source/[file.csv/file.jsonl]";
    importDataImageObjectDetectionSample(project, datasetId, gcsSourceUri);
  }

  static void importDataImageObjectDetectionSample(
      String project, String datasetId, String gcsSourceUri)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "image_bounding_box_io_format_1.0.0.yaml";
      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);
      DatasetName datasetName = DatasetName.of(project, location, datasetId);

      List<ImportDataConfig> importDataConfigList =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigList);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(300, TimeUnit.SECONDS);

      System.out.format(
          "Import Data Image Object Detection Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetId = "YOUR_DATASET_ID";
// const gcsSourceUri = "YOUR_GCS_SOURCE_URI";
// eg. "gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]"
// const project = "YOUR_PROJECT_ID";
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataImageObjectDetection() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/image_bounding_box_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Create Import Data Request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  await response.promise();

  console.log(
    `Import data image object detection response : \
      ${JSON.stringify(response.result)}`
  );
}
importDataImageObjectDetection();

Python

def image_dataset_import_data_sample(
    project: str, location: str, src_uris: list, import_schema_uri: str, dataset_id: str
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.ImageDataset(dataset_id)

    ds = ds.import_data(
        gcs_source=src_uris, import_schema_uri=import_schema_uri, sync=True
    )

    print(ds.display_name)
    print(ds.name)
    print(ds.resource_name)
    return ds

表格

文本

在下面选择您的目标对应的标签页:

单标签分类

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:将存储数据集的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备文本数据

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_classification_single_label_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-08T20:32:02.543801Z",
      "updateTime": "2020-07-08T20:32:02.543801Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataTextClassificationSingleLabelSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsSourceUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_text_source/[file.csv/file.jsonl]";

    importDataTextClassificationSingleLabelSample(project, datasetId, gcsSourceUri);
  }

  static void importDataTextClassificationSingleLabelSample(
      String project, String datasetId, String gcsSourceUri)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "text_classification_single_label_io_format_1.0.0.yaml";

      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);
      DatasetName datasetName = DatasetName.of(project, location, datasetId);

      List<ImportDataConfig> importDataConfigList =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigList);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());

      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(300, TimeUnit.SECONDS);
      System.out.format(
          "Import Data Text Classification Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetId = "YOUR_DATASET_ID";
// const gcsSourceUri = "YOUR_GCS_SOURCE_URI";
// eg. "gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]"
// const project = "YOUR_PROJECT_ID";
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataTextClassificationSingleLabel() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/text_classification_single_label_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Import data request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  const [importDataResponse] = await response.promise();

  console.log(
    `Import data text classification single label response : \
      ${JSON.stringify(importDataResponse.result)}`
  );
}
importDataTextClassificationSingleLabel();

Python

def import_data_text_classification_single_label(
    project: str,
    location: str,
    dataset: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.TextDataset(dataset)
    ds.import_data(
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.text.single_label_classification,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

多标签分类

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:将存储数据集的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备文本数据

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_classification_multi_label_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-08T20:32:02.543801Z",
      "updateTime": "2020-07-08T20:32:02.543801Z"
    }
  }
}

实体提取

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:将存储数据集的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备文本数据

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_extraction_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-08T20:32:02.543801Z",
      "updateTime": "2020-07-08T20:32:02.543801Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataTextEntityExtractionSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsSourceUri = "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_text_source/[file.jsonl]";

    importDataTextEntityExtractionSample(project, datasetId, gcsSourceUri);
  }

  static void importDataTextEntityExtractionSample(
      String project, String datasetId, String gcsSourceUri)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "text_extraction_io_format_1.0.0.yaml";

      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);
      DatasetName datasetName = DatasetName.of(project, location, datasetId);

      List<ImportDataConfig> importDataConfigList =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigList);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());

      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(300, TimeUnit.SECONDS);
      System.out.format(
          "Import Data Text Entity Extraction Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 */

// const datasetId = "YOUR_DATASET_ID";
// const gcsSourceUri = "YOUR_GCS_SOURCE_URI";
// eg. "gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]"
// const project = "YOUR_PROJECT_ID";
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataTextEntityExtraction() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/text_extraction_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Import data request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  const [importDataResponse] = await response.promise();

  console.log(
    `Import data text entity extraction response  : \
      ${JSON.stringify(importDataResponse.result)}`
  );
}
importDataTextEntityExtraction();

Python

def import_data_text_entity_extraction_sample(
    project: str,
    location: str,
    dataset: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.TextDataset(dataset)
    ds.import_data(
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.text.extraction,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

情感分析

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:将存储数据集的区域。例如 us-central1
  • PROJECT_ID:您的项目 ID。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备文本数据

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_sentiment_io_format_1.0.0.yaml "
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-07-08T20:32:02.543801Z",
      "updateTime": "2020-07-08T20:32:02.543801Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataTextSentimentAnalysisSample {

  public static void main(String[] args)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String gcsSourceUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_text_source/[file.csv/file.jsonl]";

    importDataTextSentimentAnalysisSample(project, datasetId, gcsSourceUri);
  }

  static void importDataTextSentimentAnalysisSample(
      String project, String datasetId, String gcsSourceUri)
      throws IOException, InterruptedException, ExecutionException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "text_sentiment_io_format_1.0.0.yaml";

      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);
      DatasetName datasetName = DatasetName.of(project, location, datasetId);

      List<ImportDataConfig> importDataConfigList =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigList);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());

      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(300, TimeUnit.SECONDS);
      System.out.format(
          "Import Data Text Sentiment Analysis Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 */

// const datasetId = "YOUR_DATASET_ID";
// const gcsSourceUri = "YOUR_GCS_SOURCE_URI";
// eg. "gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]"
// const project = "YOUR_PROJECT_ID";
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataTextSentimentAnalysis() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/text_sentiment_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Import data request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  const [importDataResponse] = await response.promise();

  console.log(
    `Import data text sentiment analysis response  : \
      ${JSON.stringify(importDataResponse.result, null, 2)}`
  );
}
importDataTextSentimentAnalysis();

Python

def import_data_text_sentiment_analysis_sample(
    project: str,
    location: str,
    dataset: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.TextDataset(dataset)
    ds.import_data(
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.text.sentiment,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

视频

在下面选择您的目标对应的标签页:

动作识别

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:要存储数据集的区域。例如 us-central1
  • PROJECT:您的项目 ID 或项目编号。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备视频数据
  • OBJECTIVE:指定模型目标,即“classification”、“object_tracking”或“action recognition”。
  • PROJECT_NUMBER:您的项目的项目编号(显示在响应中)。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/automl_video_OBJECTIVE_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-10-08T20:32:02.543801Z",
      "updateTime": "2020-10-08T20:32:02.543801Z"
    }
  }
}

Java

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;

public class ImportDataVideoActionRecognitionSample {

  public static void main(String[] args)
      throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "PROJECT";
    String datasetId = "DATASET_ID";
    String gcsSourceUri = "GCS_SOURCE_URI";
    importDataVideoActionRecognitionSample(project, datasetId, gcsSourceUri);
  }

  static void importDataVideoActionRecognitionSample(
      String project, String datasetId, String gcsSourceUri)
      throws IOException, ExecutionException, InterruptedException {
    DatasetServiceSettings settings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();
    String location = "us-central1";

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient client = DatasetServiceClient.create(settings)) {
      GcsSource gcsSource = GcsSource.newBuilder().addUris(gcsSourceUri).build();
      ImportDataConfig importConfig0 =
          ImportDataConfig.newBuilder()
              .setGcsSource(gcsSource)
              .setImportSchemaUri(
                  "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
                      + "video_action_recognition_io_format_1.0.0.yaml")
              .build();
      List<ImportDataConfig> importConfigs = new ArrayList<>();
      importConfigs.add(importConfig0);
      DatasetName name = DatasetName.of(project, location, datasetId);
      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> response =
          client.importDataAsync(name, importConfigs);

      // You can use OperationFuture.getInitialFuture to get a future representing the initial
      // response to the request, which contains information while the operation is in progress.
      System.out.format("Operation name: %s\n", response.getInitialFuture().get().getName());

      // OperationFuture.get() will block until the operation is finished.
      ImportDataResponse importDataResponse = response.get();
      System.out.format("importDataResponse: %s\n", importDataResponse);
    }
  }
}

Python

def import_data_video_action_recognition_sample(
    project: str,
    location: str,
    dataset_name: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.VideoDataset(dataset_name=dataset_name)

    ds.import_data(
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.video.action_recognition,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

分类

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:要存储数据集的区域。例如 us-central1
  • PROJECT:您的项目 ID 或项目编号。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备视频数据
  • OBJECTIVE:指定模型目标,即“classification”、“object_tracking”或“action recognition”。
  • PROJECT_NUMBER:您的项目的项目编号(显示在响应中)。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/automl_video_OBJECTIVE_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-10-08T20:32:02.543801Z",
      "updateTime": "2020-10-08T20:32:02.543801Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataVideoClassificationSample {

  public static void main(String[] args)
      throws InterruptedException, ExecutionException, TimeoutException, IOException {
    // TODO(developer): Replace these variables before running the sample.
    String gcsSourceUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_video_source/[file.csv/file.jsonl]";
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    importDataVideoClassification(gcsSourceUri, project, datasetId);
  }

  static void importDataVideoClassification(String gcsSourceUri, String project, String datasetId)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "video_classification_io_format_1.0.0.yaml";

      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);

      DatasetName datasetName = DatasetName.of(project, location, datasetId);
      List<ImportDataConfig> importDataConfigs =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigs);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(1800, TimeUnit.SECONDS);

      System.out.format(
          "Import Data Video Classification Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const datasetId = 'YOUR_DATASET_ID';
// const gcsSourceUri = 'YOUR_GCS_SOURCE_URI';
// eg. 'gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]'
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataVideoClassification() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/video_classification_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Create Import Data Request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  await response.promise();

  console.log(
    `Import data video classification response : \
      ${JSON.stringify(response.result)}`
  );
}
importDataVideoClassification();

Python

def import_data_video_classification_sample(
    project: str,
    location: str,
    dataset_name: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.VideoDataset(dataset_name=dataset_name)

    print(ds.display_name)
    print(ds.resource_name)

    ds.import_data(
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.video.classification,
        sync=sync,
    )

    ds.wait()

    return ds

对象跟踪

REST 和命令行

在使用任何请求数据之前,请先进行以下替换:

  • LOCATION:要存储数据集的区域。例如 us-central1
  • PROJECT:您的项目 ID 或项目编号。
  • DATASET_ID:数据集的 ID。
  • IMPORT_FILE_URI:Cloud Storage 中 CSV 或 JSONL 文件的路径,该文件列出了存储在 Cloud Storage 中用于模型训练的数据项。如需了解导入文件格式和限制,请参阅准备视频数据
  • OBJECTIVE:指定模型目标,即“classification”、“object_tracking”或“action recognition”。
  • PROJECT_NUMBER:您的项目的项目编号(显示在响应中)。

HTTP 方法和网址:

POST https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import

请求 JSON 正文:

{
  "import_configs": [
    {
      "gcs_source": {
        "uris": "IMPORT_FILE_URI"
      },
     "import_schema_uri" : "gs://google-cloud-aiplatform/schema/dataset/ioformat/automl_video_OBJECTIVE_io_format_1.0.0.yaml"
    }
  ]
}

如需发送请求,请选择以下方式之一:

curl

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

curl -X POST \
-H "Authorization: Bearer "$(gcloud auth application-default print-access-token) \
-H "Content-Type: application/json; charset=utf-8" \
-d @request.json \
"https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import"

PowerShell

将请求正文保存在名为 request.json 的文件中,然后执行以下命令:

$cred = gcloud auth application-default print-access-token
$headers = @{ "Authorization" = "Bearer $cred" }

Invoke-WebRequest `
-Method POST `
-Headers $headers `
-ContentType: "application/json; charset=utf-8" `
-InFile request.json `
-Uri "https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/datasets/DATASET_ID:import" | Select-Object -Expand Content

您应该会看到类似如下所示的输出。您可以使用响应中的 OPERATION_ID获取操作的状态

{
  "name": "projects/PROJECT_NUMBER/locations/LOCATION/datasets/DATASET_ID/operations/OPERATION_ID",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.aiplatform.v1.ImportDataOperationMetadata",
    "genericMetadata": {
      "createTime": "2020-10-08T20:32:02.543801Z",
      "updateTime": "2020-10-08T20:32:02.543801Z"
    }
  }
}

Java


import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DatasetName;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.ImportDataConfig;
import com.google.cloud.aiplatform.v1.ImportDataOperationMetadata;
import com.google.cloud.aiplatform.v1.ImportDataResponse;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ImportDataVideoObjectTrackingSample {

  public static void main(String[] args)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    String gcsSourceUri =
        "gs://YOUR_GCS_SOURCE_BUCKET/path_to_your_video_source/[file.csv/file.jsonl]";
    String project = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    importDataVideObjectTracking(gcsSourceUri, project, datasetId);
  }

  static void importDataVideObjectTracking(String gcsSourceUri, String project, String datasetId)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    DatasetServiceSettings datasetServiceSettings =
        DatasetServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DatasetServiceClient datasetServiceClient =
        DatasetServiceClient.create(datasetServiceSettings)) {
      String location = "us-central1";
      String importSchemaUri =
          "gs://google-cloud-aiplatform/schema/dataset/ioformat/"
              + "video_object_tracking_io_format_1.0.0.yaml";

      GcsSource.Builder gcsSource = GcsSource.newBuilder();
      gcsSource.addUris(gcsSourceUri);
      DatasetName datasetName = DatasetName.of(project, location, datasetId);
      List<ImportDataConfig> importDataConfigs =
          Collections.singletonList(
              ImportDataConfig.newBuilder()
                  .setGcsSource(gcsSource)
                  .setImportSchemaUri(importSchemaUri)
                  .build());

      OperationFuture<ImportDataResponse, ImportDataOperationMetadata> importDataResponseFuture =
          datasetServiceClient.importDataAsync(datasetName, importDataConfigs);
      System.out.format(
          "Operation name: %s\n", importDataResponseFuture.getInitialFuture().get().getName());
      System.out.println("Waiting for operation to finish...");
      ImportDataResponse importDataResponse = importDataResponseFuture.get(300, TimeUnit.SECONDS);

      System.out.format(
          "Import Data Video Object Tracking Response: %s\n", importDataResponse.toString());
    }
  }
}

Node.js

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 */

// const datasetId = 'YOUR_DATASET_ID';
// const gcsSourceUri = 'YOUR_GCS_SOURCE_URI';
// eg. 'gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]'
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importDataVideoObjectTracking() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri:
        'gs://google-cloud-aiplatform/schema/dataset/ioformat/video_object_tracking_io_format_1.0.0.yaml',
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Create Import Data Request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation: ${JSON.stringify(response.name)}`);

  // Wait for operation to complete
  const [importDataResponse] = await response.promise();

  console.log(
    `Import data video object tracking response : \
      ${JSON.stringify(importDataResponse)}`
  );
}
importDataVideoObjectTracking();

Python

def import_data_video_object_tracking_sample(
    project: str,
    location: str,
    dataset_name: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.VideoDataset(dataset_name=dataset_name)

    ds.import_data(
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.video.object_tracking,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds

获取操作状态

某些请求会启动需要一些时间才能完成的长时间运行的操作。这些请求会返回操作名称,您可以使用该名称查看操作状态或取消操作。Vertex AI 提供辅助方法来调用长时间运行的操作。如需了解详情,请参阅使用长时间运行的操作

后续步骤