使用 create_dataset 方法为表格 BigQuery 创建数据集。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
Java
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Java 设置说明执行操作。如需了解详情,请参阅 Vertex AI Java API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.CreateDatasetOperationMetadata;
import com.google.cloud.aiplatform.v1.Dataset;
import com.google.cloud.aiplatform.v1.DatasetServiceClient;
import com.google.cloud.aiplatform.v1.DatasetServiceSettings;
import com.google.cloud.aiplatform.v1.LocationName;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
public class CreateDatasetTabularBigquerySample {
public static void main(String[] args)
throws InterruptedException, ExecutionException, TimeoutException, IOException {
// TODO(developer): Replace these variables before running the sample.
String project = "YOUR_PROJECT_ID";
String bigqueryDisplayName = "YOUR_DATASET_DISPLAY_NAME";
String bigqueryUri =
"bq://YOUR_GOOGLE_CLOUD_PROJECT_ID.BIGQUERY_DATASET_ID.BIGQUERY_TABLE_OR_VIEW_ID";
createDatasetTableBigquery(project, bigqueryDisplayName, bigqueryUri);
}
static void createDatasetTableBigquery(
String project, String bigqueryDisplayName, String bigqueryUri)
throws IOException, ExecutionException, InterruptedException, TimeoutException {
DatasetServiceSettings settings =
DatasetServiceSettings.newBuilder()
.setEndpoint("us-central1-aiplatform.googleapis.com:443")
.build();
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (DatasetServiceClient datasetServiceClient = DatasetServiceClient.create(settings)) {
String location = "us-central1";
String metadataSchemaUri =
"gs://google-cloud-aiplatform/schema/dataset/metadata/tables_1.0.0.yaml";
LocationName locationName = LocationName.of(project, location);
String jsonString =
"{\"input_config\": {\"bigquery_source\": {\"uri\": \"" + bigqueryUri + "\"}}}";
Value.Builder metaData = Value.newBuilder();
JsonFormat.parser().merge(jsonString, metaData);
Dataset dataset =
Dataset.newBuilder()
.setDisplayName(bigqueryDisplayName)
.setMetadataSchemaUri(metadataSchemaUri)
.setMetadata(metaData)
.build();
OperationFuture<Dataset, CreateDatasetOperationMetadata> datasetFuture =
datasetServiceClient.createDatasetAsync(locationName, dataset);
System.out.format("Operation name: %s\n", datasetFuture.getInitialFuture().get().getName());
System.out.println("Waiting for operation to finish...");
Dataset datasetResponse = datasetFuture.get(300, TimeUnit.SECONDS);
System.out.println("Create Dataset Table Bigquery sample");
System.out.format("Name: %s\n", datasetResponse.getName());
System.out.format("Display Name: %s\n", datasetResponse.getDisplayName());
System.out.format("Metadata Schema Uri: %s\n", datasetResponse.getMetadataSchemaUri());
System.out.format("Metadata: %s\n", datasetResponse.getMetadata());
}
}
}
Node.js
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Node.js 设置说明执行操作。如需了解详情,请参阅 Vertex AI Node.js API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
/**
* TODO(developer): Uncomment these variables before running the sample.\
* (Not necessary if passing values as arguments)
*/
// const datasetDisplayName = 'YOUR_DATASET_DISPLAY_NAME';
// const bigquerySourceUri = 'YOUR_BIGQUERY_SOURCE_URI';
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';
// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');
// Specifies the location of the api endpoint
const clientOptions = {
apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
// Instantiates a client
const datasetServiceClient = new DatasetServiceClient(clientOptions);
async function createDatasetTabularBigquery() {
// Configure the parent resource
const parent = `projects/${project}/locations/${location}`;
const metadata = {
structValue: {
fields: {
inputConfig: {
structValue: {
fields: {
bigquerySource: {
structValue: {
fields: {
uri: {
listValue: {
values: [{stringValue: bigquerySourceUri}],
},
},
},
},
},
},
},
},
},
},
};
// Configure the dataset resource
const dataset = {
displayName: datasetDisplayName,
metadataSchemaUri:
'gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml',
metadata: metadata,
};
const request = {
parent,
dataset,
};
// Create dataset request
const [response] = await datasetServiceClient.createDataset(request);
console.log(`Long running operation : ${response.name}`);
// Wait for operation to complete
await response.promise();
const result = response.result;
console.log('Create dataset tabular bigquery response');
console.log(`\tName : ${result.name}`);
console.log(`\tDisplay name : ${result.displayName}`);
console.log(`\tMetadata schema uri : ${result.metadataSchemaUri}`);
console.log(`\tMetadata : ${JSON.stringify(result.metadata)}`);
}
createDatasetTabularBigquery();
Python
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Python 设置说明执行操作。如需了解详情,请参阅 Vertex AI Python API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
def create_dataset_tabular_bigquery_sample(
project: str,
display_name: str,
bigquery_uri: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
timeout: int = 300,
):
# The AI Platform services require regional API endpoints.
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.DatasetServiceClient(client_options=client_options)
metadata_dict = {"input_config": {"bigquery_source": {"uri": bigquery_uri}}}
metadata = json_format.ParseDict(metadata_dict, Value())
dataset = {
"display_name": display_name,
"metadata_schema_uri": "gs://google-cloud-aiplatform/schema/dataset/metadata/tabular_1.0.0.yaml",
"metadata": metadata,
}
parent = f"projects/{project}/locations/{location}"
response = client.create_dataset(parent=parent, dataset=dataset)
print("Long running operation:", response.operation.name)
create_dataset_response = response.result(timeout=timeout)
print("create_dataset_response:", create_dataset_response)
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。