使用 create_training_pipeline 方法创建自定义训练代管式数据集的训练流水线。
代码示例
Java
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Java 设置说明执行操作。 如需了解详情,请参阅 Vertex AI Java API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import com.google.cloud.aiplatform.v1.GcsDestination;
import com.google.cloud.aiplatform.v1.InputDataConfig;
import com.google.cloud.aiplatform.v1.LocationName;
import com.google.cloud.aiplatform.v1.Model;
import com.google.cloud.aiplatform.v1.ModelContainerSpec;
import com.google.cloud.aiplatform.v1.PipelineServiceClient;
import com.google.cloud.aiplatform.v1.PipelineServiceSettings;
import com.google.cloud.aiplatform.v1.TrainingPipeline;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.protobuf.Value;
import com.google.protobuf.util.JsonFormat;
import java.io.IOException;
public class CreateTrainingPipelineCustomTrainingManagedDatasetSample {
public static void main(String[] args) throws IOException {
// TODO(developer): Replace these variables before running the sample.
String project = "PROJECT";
String displayName = "DISPLAY_NAME";
String modelDisplayName = "MODEL_DISPLAY_NAME";
String datasetId = "DATASET_ID";
String annotationSchemaUri = "ANNOTATION_SCHEMA_URI";
String trainingContainerSpecImageUri = "TRAINING_CONTAINER_SPEC_IMAGE_URI";
String modelContainerSpecImageUri = "MODEL_CONTAINER_SPEC_IMAGE_URI";
String baseOutputUriPrefix = "BASE_OUTPUT_URI_PREFIX";
createTrainingPipelineCustomTrainingManagedDatasetSample(
project,
displayName,
modelDisplayName,
datasetId,
annotationSchemaUri,
trainingContainerSpecImageUri,
modelContainerSpecImageUri,
baseOutputUriPrefix);
}
static void createTrainingPipelineCustomTrainingManagedDatasetSample(
String project,
String displayName,
String modelDisplayName,
String datasetId,
String annotationSchemaUri,
String trainingContainerSpecImageUri,
String modelContainerSpecImageUri,
String baseOutputUriPrefix)
throws IOException {
PipelineServiceSettings settings =
PipelineServiceSettings.newBuilder()
.setEndpoint("us-central1-aiplatform.googleapis.com:443")
.build();
String location = "us-central1";
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (PipelineServiceClient client = PipelineServiceClient.create(settings)) {
JsonArray jsonArgs = new JsonArray();
jsonArgs.add("--model-dir=$(AIP_MODEL_DIR)");
// training_task_inputs
JsonObject jsonTrainingContainerSpec = new JsonObject();
jsonTrainingContainerSpec.addProperty("imageUri", trainingContainerSpecImageUri);
// AIP_MODEL_DIR is set by the service according to baseOutputDirectory.
jsonTrainingContainerSpec.add("args", jsonArgs);
JsonObject jsonMachineSpec = new JsonObject();
jsonMachineSpec.addProperty("machineType", "n1-standard-8");
JsonObject jsonTrainingWorkerPoolSpec = new JsonObject();
jsonTrainingWorkerPoolSpec.addProperty("replicaCount", 1);
jsonTrainingWorkerPoolSpec.add("machineSpec", jsonMachineSpec);
jsonTrainingWorkerPoolSpec.add("containerSpec", jsonTrainingContainerSpec);
JsonArray jsonWorkerPoolSpecs = new JsonArray();
jsonWorkerPoolSpecs.add(jsonTrainingWorkerPoolSpec);
JsonObject jsonBaseOutputDirectory = new JsonObject();
jsonBaseOutputDirectory.addProperty("outputUriPrefix", baseOutputUriPrefix);
JsonObject jsonTrainingTaskInputs = new JsonObject();
jsonTrainingTaskInputs.add("workerPoolSpecs", jsonWorkerPoolSpecs);
jsonTrainingTaskInputs.add("baseOutputDirectory", jsonBaseOutputDirectory);
Value.Builder trainingTaskInputsBuilder = Value.newBuilder();
JsonFormat.parser().merge(jsonTrainingTaskInputs.toString(), trainingTaskInputsBuilder);
Value trainingTaskInputs = trainingTaskInputsBuilder.build();
// model_to_upload
ModelContainerSpec modelContainerSpec =
ModelContainerSpec.newBuilder().setImageUri(modelContainerSpecImageUri).build();
Model model =
Model.newBuilder()
.setDisplayName(modelDisplayName)
.setContainerSpec(modelContainerSpec)
.build();
GcsDestination gcsDestination =
GcsDestination.newBuilder().setOutputUriPrefix(baseOutputUriPrefix).build();
// input_data_config
InputDataConfig inputDataConfig =
InputDataConfig.newBuilder()
.setDatasetId(datasetId)
.setAnnotationSchemaUri(annotationSchemaUri)
.setGcsDestination(gcsDestination)
.build();
// training_task_definition
String customTaskDefinition =
"gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml";
TrainingPipeline trainingPipeline =
TrainingPipeline.newBuilder()
.setDisplayName(displayName)
.setInputDataConfig(inputDataConfig)
.setTrainingTaskDefinition(customTaskDefinition)
.setTrainingTaskInputs(trainingTaskInputs)
.setModelToUpload(model)
.build();
LocationName parent = LocationName.of(project, location);
TrainingPipeline response = client.createTrainingPipeline(parent, trainingPipeline);
System.out.format("response: %s\n", response);
System.out.format("Name: %s\n", response.getName());
}
}
}
Python
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Python 设置说明执行操作。 如需了解详情,请参阅 Vertex AI Python API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
def create_training_pipeline_custom_training_managed_dataset_sample(
project: str,
display_name: str,
model_display_name: str,
dataset_id: str,
annotation_schema_uri: str,
training_container_spec_image_uri: str,
model_container_spec_image_uri: str,
base_output_uri_prefix: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
# The AI Platform services require regional API endpoints.
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.PipelineServiceClient(client_options=client_options)
# input_data_config
input_data_config = {
"dataset_id": dataset_id,
"annotation_schema_uri": annotation_schema_uri,
"gcs_destination": {"output_uri_prefix": base_output_uri_prefix},
}
# training_task_definition
custom_task_definition = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml"
# training_task_inputs
training_container_spec = {
"imageUri": training_container_spec_image_uri,
# AIP_MODEL_DIR is set by the service according to baseOutputDirectory.
"args": ["--model-dir=$(AIP_MODEL_DIR)"],
}
training_worker_pool_spec = {
"replicaCount": 1,
"machineSpec": {"machineType": "n1-standard-8"},
"containerSpec": training_container_spec,
}
training_task_inputs_dict = {
"workerPoolSpecs": [training_worker_pool_spec],
"baseOutputDirectory": {"outputUriPrefix": base_output_uri_prefix},
}
training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value())
# model_to_upload
model_container_spec = {
"image_uri": model_container_spec_image_uri,
"command": [],
"args": [],
}
model = {"display_name": model_display_name, "container_spec": model_container_spec}
training_pipeline = {
"display_name": display_name,
"input_data_config": input_data_config,
"training_task_definition": custom_task_definition,
"training_task_inputs": training_task_inputs,
"model_to_upload": model,
}
parent = f"projects/{project}/locations/{location}"
response = client.create_training_pipeline(
parent=parent, training_pipeline=training_pipeline
)
print("response:", response)
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。