Implantar um modelo personalizado

Implanta um modelo treinado personalizado usando o método deploy_model.

Mais informações

Para ver a documentação detalhada que inclui este exemplo de código, consulte:

Exemplo de código

Java

Para saber como instalar e usar a biblioteca de cliente para Vertex AI, consulte Bibliotecas de cliente Vertex AI. Para mais informações, consulte a documentação de referência da API Vertex AI para Java.

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.DedicatedResources;
import com.google.cloud.aiplatform.v1.DeployModelOperationMetadata;
import com.google.cloud.aiplatform.v1.DeployModelResponse;
import com.google.cloud.aiplatform.v1.DeployedModel;
import com.google.cloud.aiplatform.v1.EndpointName;
import com.google.cloud.aiplatform.v1.EndpointServiceClient;
import com.google.cloud.aiplatform.v1.EndpointServiceSettings;
import com.google.cloud.aiplatform.v1.MachineSpec;
import com.google.cloud.aiplatform.v1.ModelName;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;

public class DeployModelCustomTrainedModelSample {

  public static void main(String[] args)
      throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String project = "PROJECT";
    String endpointId = "ENDPOINT_ID";
    String modelName = "MODEL_NAME";
    String deployedModelDisplayName = "DEPLOYED_MODEL_DISPLAY_NAME";
    deployModelCustomTrainedModelSample(project, endpointId, modelName, deployedModelDisplayName);
  }

  static void deployModelCustomTrainedModelSample(
      String project, String endpointId, String model, String deployedModelDisplayName)
      throws IOException, ExecutionException, InterruptedException {
    EndpointServiceSettings settings =
        EndpointServiceSettings.newBuilder()
            .setEndpoint("us-central1-aiplatform.googleapis.com:443")
            .build();
    String location = "us-central1";

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (EndpointServiceClient client = EndpointServiceClient.create(settings)) {
      MachineSpec machineSpec = MachineSpec.newBuilder().setMachineType("n1-standard-2").build();
      DedicatedResources dedicatedResources =
          DedicatedResources.newBuilder().setMinReplicaCount(1).setMachineSpec(machineSpec).build();

      String modelName = ModelName.of(project, location, model).toString();
      DeployedModel deployedModel =
          DeployedModel.newBuilder()
              .setModel(modelName)
              .setDisplayName(deployedModelDisplayName)
              // `dedicated_resources` must be used for non-AutoML models
              .setDedicatedResources(dedicatedResources)
              .build();
      // key '0' assigns traffic for the newly deployed model
      // Traffic percentage values must add up to 100
      // Leave dictionary empty if endpoint should not accept any traffic
      Map<String, Integer> trafficSplit = new HashMap<>();
      trafficSplit.put("0", 100);
      EndpointName endpoint = EndpointName.of(project, location, endpointId);
      OperationFuture<DeployModelResponse, DeployModelOperationMetadata> response =
          client.deployModelAsync(endpoint, deployedModel, trafficSplit);

      // You can use OperationFuture.getInitialFuture to get a future representing the initial
      // response to the request, which contains information while the operation is in progress.
      System.out.format("Operation name: %s\n", response.getInitialFuture().get().getName());

      // OperationFuture.get() will block until the operation is finished.
      DeployModelResponse deployModelResponse = response.get();
      System.out.format("deployModelResponse: %s\n", deployModelResponse);
    }
  }
}

Node.js

Para saber como instalar e usar a biblioteca de cliente para Vertex AI, consulte Bibliotecas de cliente Vertex AI. Para mais informações, consulte a documentação de referência da API Vertex AI para Node.js.

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 * (Not necessary if passing values as arguments)
 */

// const modelId = "YOUR_MODEL_ID";
// const endpointId = 'YOUR_ENDPOINT_ID';
// const deployedModelDisplayName = 'YOUR_DEPLOYED_MODEL_DISPLAY_NAME';
// const project = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';

const modelName = `projects/${project}/locations/${location}/models/${modelId}`;
const endpoint = `projects/${project}/locations/${location}/endpoints/${endpointId}`;
// Imports the Google Cloud Endpoint Service Client library
const {EndpointServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint:
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};

// Instantiates a client
const endpointServiceClient = new EndpointServiceClient(clientOptions);

async function deployModelCustomTrainedModel() {
  // Configure the parent resource
  // key '0' assigns traffic for the newly deployed model
  // Traffic percentage values must add up to 100
  // Leave dictionary empty if endpoint should not accept any traffic
  const trafficSplit = {0: 100};
  const deployedModel = {
    // format: 'projects/{project}/locations/{location}/models/{model}'
    model: modelName,
    displayName: deployedModelDisplayName,
    // `dedicatedResources` must be used for non-AutoML models
    dedicatedResources: {
      minReplicaCount: 1,
      machineSpec: {
        machineType: 'n1-standard-2',
        // Accelerators can be used only if the model specifies a GPU image.
        // acceleratorType: 'NVIDIA_TESLA_K80',
        // acceleratorCount: 1,
      },
    },
  };
  const request = {
    endpoint,
    deployedModel,
    trafficSplit,
  };

  // Get and print out a list of all the endpoints for this resource
  const [response] = await endpointServiceClient.deployModel(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  await response.promise();
  const result = response.result;

  console.log('Deploy model response');
  const modelDeployed = result.deployedModel;
  console.log(`\t\tId : ${modelDeployed.id}`);
  console.log(modelDeployed);
}
deployModelCustomTrainedModel();

Python

Para saber como instalar e usar a biblioteca de cliente para Vertex AI, consulte Bibliotecas de cliente Vertex AI. Para mais informações, consulte a documentação de referência da API Vertex AI para Python.

from google.cloud import aiplatform

def deploy_model_custom_trained_model_sample(
    project: str,
    endpoint_id: str,
    model_name: str,
    deployed_model_display_name: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    timeout: int = 7200,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.EndpointServiceClient(client_options=client_options)
    deployed_model = {
        # format: 'projects/{project}/locations/{location}/models/{model}'
        "model": model_name,
        "display_name": deployed_model_display_name,
        # `dedicated_resources` must be used for non-AutoML models
        "dedicated_resources": {
            "min_replica_count": 1,
            "machine_spec": {
                "machine_type": "n1-standard-2",
                # Accelerators can be used only if the model specifies a GPU image.
                # 'accelerator_type': aiplatform.gapic.AcceleratorType.NVIDIA_TESLA_K80,
                # 'accelerator_count': 1,
            },
        },
    }
    # key '0' assigns traffic for the newly deployed model
    # Traffic percentage values must add up to 100
    # Leave dictionary empty if endpoint should not accept any traffic
    traffic_split = {"0": 100}
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.deploy_model(
        endpoint=endpoint, deployed_model=deployed_model, traffic_split=traffic_split
    )
    print("Long running operation:", response.operation.name)
    deploy_model_response = response.result(timeout=timeout)
    print("deploy_model_response:", deploy_model_response)

A seguir

Para pesquisar e filtrar exemplos de código de outros produtos do Google Cloud, consulte o navegador de exemplos do Google Cloud.