使用 deploy_model 方法部署模型。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
Java
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Java 设置说明执行操作。如需了解详情,请参阅 Vertex AI Java API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import com.google.api.gax.longrunning.OperationFuture;
import com.google.api.gax.longrunning.OperationTimedPollAlgorithm;
import com.google.api.gax.retrying.RetrySettings;
import com.google.cloud.aiplatform.v1.AutomaticResources;
import com.google.cloud.aiplatform.v1.DedicatedResources;
import com.google.cloud.aiplatform.v1.DeployModelOperationMetadata;
import com.google.cloud.aiplatform.v1.DeployModelResponse;
import com.google.cloud.aiplatform.v1.DeployedModel;
import com.google.cloud.aiplatform.v1.EndpointName;
import com.google.cloud.aiplatform.v1.EndpointServiceClient;
import com.google.cloud.aiplatform.v1.EndpointServiceSettings;
import com.google.cloud.aiplatform.v1.MachineSpec;
import com.google.cloud.aiplatform.v1.ModelName;
import com.google.cloud.aiplatform.v1.stub.EndpointServiceStubSettings;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.threeten.bp.Duration;
public class DeployModelSample {
public static void main(String[] args)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// TODO(developer): Replace these variables before running the sample.
String project = "YOUR_PROJECT_ID";
String deployedModelDisplayName = "YOUR_DEPLOYED_MODEL_DISPLAY_NAME";
String endpointId = "YOUR_ENDPOINT_NAME";
String modelId = "YOUR_MODEL_ID";
int timeout = 900;
deployModelSample(project, deployedModelDisplayName, endpointId, modelId, timeout);
}
static void deployModelSample(
String project,
String deployedModelDisplayName,
String endpointId,
String modelId,
int timeout)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// Set long-running operations (LROs) timeout
final OperationTimedPollAlgorithm operationTimedPollAlgorithm =
OperationTimedPollAlgorithm.create(
RetrySettings.newBuilder()
.setInitialRetryDelay(Duration.ofMillis(5000L))
.setRetryDelayMultiplier(1.5)
.setMaxRetryDelay(Duration.ofMillis(45000L))
.setInitialRpcTimeout(Duration.ZERO)
.setRpcTimeoutMultiplier(1.0)
.setMaxRpcTimeout(Duration.ZERO)
.setTotalTimeout(Duration.ofSeconds(timeout))
.build());
EndpointServiceStubSettings.Builder endpointServiceStubSettingsBuilder =
EndpointServiceStubSettings.newBuilder();
endpointServiceStubSettingsBuilder
.deployModelOperationSettings()
.setPollingAlgorithm(operationTimedPollAlgorithm);
EndpointServiceStubSettings endpointStubSettings = endpointServiceStubSettingsBuilder.build();
EndpointServiceSettings endpointServiceSettings =
EndpointServiceSettings.create(endpointStubSettings);
endpointServiceSettings =
endpointServiceSettings.toBuilder()
.setEndpoint("us-central1-aiplatform.googleapis.com:443")
.build();
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (EndpointServiceClient endpointServiceClient =
EndpointServiceClient.create(endpointServiceSettings)) {
String location = "us-central1";
EndpointName endpointName = EndpointName.of(project, location, endpointId);
// key '0' assigns traffic for the newly deployed model
// Traffic percentage values must add up to 100
// Leave dictionary empty if endpoint should not accept any traffic
Map<String, Integer> trafficSplit = new HashMap<>();
trafficSplit.put("0", 100);
ModelName modelName = ModelName.of(project, location, modelId);
AutomaticResources automaticResourcesInput =
AutomaticResources.newBuilder().setMinReplicaCount(1).setMaxReplicaCount(1).build();
DeployedModel deployedModelInput =
DeployedModel.newBuilder()
.setModel(modelName.toString())
.setDisplayName(deployedModelDisplayName)
.setAutomaticResources(automaticResourcesInput)
.build();
OperationFuture<DeployModelResponse, DeployModelOperationMetadata> deployModelResponseFuture =
endpointServiceClient.deployModelAsync(endpointName, deployedModelInput, trafficSplit);
System.out.format(
"Operation name: %s\n", deployModelResponseFuture.getInitialFuture().get().getName());
System.out.println("Waiting for operation to finish...");
DeployModelResponse deployModelResponse = deployModelResponseFuture.get(20, TimeUnit.MINUTES);
System.out.println("Deploy Model Response");
DeployedModel deployedModel = deployModelResponse.getDeployedModel();
System.out.println("\tDeployed Model");
System.out.format("\t\tid: %s\n", deployedModel.getId());
System.out.format("\t\tmodel: %s\n", deployedModel.getModel());
System.out.format("\t\tDisplay Name: %s\n", deployedModel.getDisplayName());
System.out.format("\t\tCreate Time: %s\n", deployedModel.getCreateTime());
DedicatedResources dedicatedResources = deployedModel.getDedicatedResources();
System.out.println("\t\tDedicated Resources");
System.out.format("\t\t\tMin Replica Count: %s\n", dedicatedResources.getMinReplicaCount());
MachineSpec machineSpec = dedicatedResources.getMachineSpec();
System.out.println("\t\t\tMachine Spec");
System.out.format("\t\t\t\tMachine Type: %s\n", machineSpec.getMachineType());
System.out.format("\t\t\t\tAccelerator Type: %s\n", machineSpec.getAcceleratorType());
System.out.format("\t\t\t\tAccelerator Count: %s\n", machineSpec.getAcceleratorCount());
AutomaticResources automaticResources = deployedModel.getAutomaticResources();
System.out.println("\t\tAutomatic Resources");
System.out.format("\t\t\tMin Replica Count: %s\n", automaticResources.getMinReplicaCount());
System.out.format("\t\t\tMax Replica Count: %s\n", automaticResources.getMaxReplicaCount());
}
}
}
Node.js
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Node.js 设置说明执行操作。如需了解详情,请参阅 Vertex AI Node.js API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
/**
* TODO(developer): Uncomment these variables before running the sample.\
* (Not necessary if passing values as arguments)
*/
// const modelId = "YOUR_MODEL_ID";
// const endpointId = 'YOUR_ENDPOINT_ID';
// const deployedModelDisplayName = 'YOUR_DEPLOYED_MODEL_DISPLAY_NAME';
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';
const modelName = `projects/${project}/locations/${location}/models/${modelId}`;
const endpoint = `projects/${project}/locations/${location}/endpoints/${endpointId}`;
// Imports the Google Cloud Endpoint Service Client library
const {EndpointServiceClient} = require('@google-cloud/aiplatform');
// Specifies the location of the api endpoint:
const clientOptions = {
apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
// Instantiates a client
const endpointServiceClient = new EndpointServiceClient(clientOptions);
async function deployModel() {
// Configure the parent resource
// key '0' assigns traffic for the newly deployed model
// Traffic percentage values must add up to 100
// Leave dictionary empty if endpoint should not accept any traffic
const trafficSplit = {0: 100};
const deployedModel = {
// format: 'projects/{project}/locations/{location}/models/{model}'
model: modelName,
displayName: deployedModelDisplayName,
// AutoML Vision models require `automatic_resources` field
// Other model types may require `dedicated_resources` field instead
automaticResources: {minReplicaCount: 1, maxReplicaCount: 1},
};
const request = {
endpoint,
deployedModel,
trafficSplit,
};
// Get and print out a list of all the endpoints for this resource
const [response] = await endpointServiceClient.deployModel(request);
console.log(`Long running operation : ${response.name}`);
// Wait for operation to complete
await response.promise();
const result = response.result;
console.log('Deploy model response');
const modelDeployed = result.deployedModel;
console.log('\tDeployed model');
if (!modelDeployed) {
console.log('\t\tId : {}');
console.log('\t\tModel : {}');
console.log('\t\tDisplay name : {}');
console.log('\t\tCreate time : {}');
console.log('\t\tDedicated resources');
console.log('\t\t\tMin replica count : {}');
console.log('\t\t\tMachine spec {}');
console.log('\t\t\t\tMachine type : {}');
console.log('\t\t\t\tAccelerator type : {}');
console.log('\t\t\t\tAccelerator count : {}');
console.log('\t\tAutomatic resources');
console.log('\t\t\tMin replica count : {}');
console.log('\t\t\tMax replica count : {}');
} else {
console.log(`\t\tId : ${modelDeployed.id}`);
console.log(`\t\tModel : ${modelDeployed.model}`);
console.log(`\t\tDisplay name : ${modelDeployed.displayName}`);
console.log(`\t\tCreate time : ${modelDeployed.createTime}`);
const dedicatedResources = modelDeployed.dedicatedResources;
console.log('\t\tDedicated resources');
if (!dedicatedResources) {
console.log('\t\t\tMin replica count : {}');
console.log('\t\t\tMachine spec {}');
console.log('\t\t\t\tMachine type : {}');
console.log('\t\t\t\tAccelerator type : {}');
console.log('\t\t\t\tAccelerator count : {}');
} else {
console.log(
`\t\t\tMin replica count : \
${dedicatedResources.minReplicaCount}`
);
const machineSpec = dedicatedResources.machineSpec;
console.log('\t\t\tMachine spec');
console.log(`\t\t\t\tMachine type : ${machineSpec.machineType}`);
console.log(
`\t\t\t\tAccelerator type : ${machineSpec.acceleratorType}`
);
console.log(
`\t\t\t\tAccelerator count : ${machineSpec.acceleratorCount}`
);
}
const automaticResources = modelDeployed.automaticResources;
console.log('\t\tAutomatic resources');
if (!automaticResources) {
console.log('\t\t\tMin replica count : {}');
console.log('\t\t\tMax replica count : {}');
} else {
console.log(
`\t\t\tMin replica count : \
${automaticResources.minReplicaCount}`
);
console.log(
`\t\t\tMax replica count : \
${automaticResources.maxReplicaCount}`
);
}
}
}
deployModel();
Python
在尝试此示例之前,请按照《Vertex AI 快速入门:使用客户端库》中的 Python 设置说明执行操作。如需了解详情,请参阅 Vertex AI Python API 参考文档。
如需向 Vertex AI 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
from google.cloud import aiplatform
def deploy_model_sample(
project: str,
endpoint_id: str,
model_name: str,
deployed_model_display_name: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
timeout: int = 7200,
):
# The AI Platform services require regional API endpoints.
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.EndpointServiceClient(client_options=client_options)
deployed_model = {
# format: 'projects/{project}/locations/{location}/models/{model}'
"model": model_name,
"display_name": deployed_model_display_name,
# AutoML Vision models require `automatic_resources` field
# Other model types may require `dedicated_resources` field instead
"automatic_resources": {"min_replica_count": 1, "max_replica_count": 1},
}
# key '0' assigns traffic for the newly deployed model
# Traffic percentage values must add up to 100
# Leave dictionary empty if endpoint should not accept any traffic
traffic_split = {"0": 100}
endpoint = client.endpoint_path(
project=project, location=location, endpoint=endpoint_id
)
response = client.deploy_model(
endpoint=endpoint, deployed_model=deployed_model, traffic_split=traffic_split
)
print("Long running operation:", response.operation.name)
deploy_model_response = response.result(timeout=timeout)
print("deploy_model_response:", deploy_model_response)
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。