Crea e invia un job che utilizza un bucket Cloud Storage

Creare e inviare un job batch di esempio che esegue uno script che accede a un bucket Cloud Storage esistente. La definizione del job specifica di montare il bucket su tutte le VM in modo che le attività del job possano leggere i dati di input al suo interno o scriverne i risultati. Dopo aver inviato il job, Batch accoda, pianifica ed esegue automaticamente il job sulle VM di Compute Engine.

Per saperne di più

Per la documentazione dettagliata che include questo esempio di codice, consulta quanto segue:

Esempio di codice

Go

Per scoprire di più, consulta la documentazione di riferimento per le API Go in batch.

Per eseguire l'autenticazione in batch, configura le Credenziali predefinite dell'applicazione. Per saperne di più, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.

import (
	"context"
	"fmt"
	"io"

	batch "cloud.google.com/go/batch/apiv1"
	batchpb "google.golang.org/genproto/googleapis/cloud/batch/v1"
	durationpb "google.golang.org/protobuf/types/known/durationpb"
)

// Creates and runs a job that executes the specified script
func createScriptJobWithBucket(w io.Writer, projectID, region, jobName, bucketName string) error {
	// projectID := "your_project_id"
	// region := "us-central1"
	// jobName := "some-job"
	// jobName := "some-bucket"

	ctx := context.Background()
	batchClient, err := batch.NewClient(ctx)
	if err != nil {
		return fmt.Errorf("NewClient: %w", err)
	}
	defer batchClient.Close()

	// Define what will be done as part of the job.
	command := &batchpb.Runnable_Script_Text{
		Text: "echo Hello world from task ${BATCH_TASK_INDEX}. >> /mnt/share/output_task_${BATCH_TASK_INDEX}.txt",
	}

	// Specify the Google Cloud Storage bucket to mount
	volume := &batchpb.Volume{
		Source: &batchpb.Volume_Gcs{
			Gcs: &batchpb.GCS{
				RemotePath: bucketName,
			},
		},
		MountPath:    "/mnt/share",
		MountOptions: []string{},
	}

	// We can specify what resources are requested by each task.
	resources := &batchpb.ComputeResource{
		// CpuMilli is milliseconds per cpu-second. This means the task requires 50% of a single CPUs.
		CpuMilli:  500,
		MemoryMib: 16,
	}

	taskSpec := &batchpb.TaskSpec{
		Runnables: []*batchpb.Runnable{{
			Executable: &batchpb.Runnable_Script_{
				Script: &batchpb.Runnable_Script{Command: command},
			},
		}},
		ComputeResource: resources,
		MaxRunDuration: &durationpb.Duration{
			Seconds: 3600,
		},
		MaxRetryCount: 2,
		Volumes:       []*batchpb.Volume{volume},
	}

	// Tasks are grouped inside a job using TaskGroups.
	taskGroups := []*batchpb.TaskGroup{
		{
			TaskCount: 4,
			TaskSpec:  taskSpec,
		},
	}

	// Policies are used to define on what kind of virtual machines the tasks will run on.
	// In this case, we tell the system to use "e2-standard-4" machine type.
	// Read more about machine types here: https://cloud.google.com/compute/docs/machine-types
	allocationPolicy := &batchpb.AllocationPolicy{
		Instances: []*batchpb.AllocationPolicy_InstancePolicyOrTemplate{{
			PolicyTemplate: &batchpb.AllocationPolicy_InstancePolicyOrTemplate_Policy{
				Policy: &batchpb.AllocationPolicy_InstancePolicy{
					MachineType: "e2-standard-4",
				},
			},
		}},
	}

	// We use Cloud Logging as it's an out of the box available option
	logsPolicy := &batchpb.LogsPolicy{
		Destination: batchpb.LogsPolicy_CLOUD_LOGGING,
	}

	jobLabels := map[string]string{"env": "testing", "type": "script"}

	// The job's parent is the region in which the job will run
	parent := fmt.Sprintf("projects/%s/locations/%s", projectID, region)

	job := batchpb.Job{
		TaskGroups:       taskGroups,
		AllocationPolicy: allocationPolicy,
		Labels:           jobLabels,
		LogsPolicy:       logsPolicy,
	}

	req := &batchpb.CreateJobRequest{
		Parent: parent,
		JobId:  jobName,
		Job:    &job,
	}

	created_job, err := batchClient.CreateJob(ctx, req)
	if err != nil {
		return fmt.Errorf("unable to create job: %w", err)
	}

	fmt.Fprintf(w, "Job created: %v\n", created_job)

	return nil
}

Java

Per scoprire di più, consulta la documentazione di riferimento per le API Java in batch.

Per eseguire l'autenticazione in batch, configura le Credenziali predefinite dell'applicazione. Per saperne di più, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.


import com.google.cloud.batch.v1.AllocationPolicy;
import com.google.cloud.batch.v1.AllocationPolicy.InstancePolicy;
import com.google.cloud.batch.v1.AllocationPolicy.InstancePolicyOrTemplate;
import com.google.cloud.batch.v1.BatchServiceClient;
import com.google.cloud.batch.v1.ComputeResource;
import com.google.cloud.batch.v1.CreateJobRequest;
import com.google.cloud.batch.v1.GCS;
import com.google.cloud.batch.v1.Job;
import com.google.cloud.batch.v1.LogsPolicy;
import com.google.cloud.batch.v1.LogsPolicy.Destination;
import com.google.cloud.batch.v1.Runnable;
import com.google.cloud.batch.v1.Runnable.Script;
import com.google.cloud.batch.v1.TaskGroup;
import com.google.cloud.batch.v1.TaskSpec;
import com.google.cloud.batch.v1.Volume;
import com.google.protobuf.Duration;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class CreateWithMountedBucket {

  public static void main(String[] args)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    // TODO(developer): Replace these variables before running the sample.
    // Project ID or project number of the Cloud project you want to use.
    String projectId = "YOUR_PROJECT_ID";

    // Name of the region you want to use to run the job. Regions that are
    // available for Batch are listed on: https://cloud.google.com/batch/docs/get-started#locations
    String region = "europe-central2";

    // The name of the job that will be created.
    // It needs to be unique for each project and region pair.
    String jobName = "JOB_NAME";

    // Name of the bucket to be mounted for your Job.
    String bucketName = "BUCKET_NAME";

    createScriptJobWithBucket(projectId, region, jobName, bucketName);
  }

  // This method shows how to create a sample Batch Job that will run
  // a simple command on Cloud Compute instances.
  public static void createScriptJobWithBucket(String projectId, String region, String jobName,
      String bucketName)
      throws IOException, ExecutionException, InterruptedException, TimeoutException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the `batchServiceClient.close()` method on the client to safely
    // clean up any remaining background resources.
    try (BatchServiceClient batchServiceClient = BatchServiceClient.create()) {

      // Define what will be done as part of the job.
      Runnable runnable =
          Runnable.newBuilder()
              .setScript(
                  Script.newBuilder()
                      .setText(
                          "echo Hello world from task ${BATCH_TASK_INDEX}. >> "
                              + "/mnt/share/output_task_${BATCH_TASK_INDEX}.txt")
                      // You can also run a script from a file. Just remember, that needs to be a
                      // script that's already on the VM that will be running the job.
                      // Using setText() and setPath() is mutually exclusive.
                      // .setPath("/tmp/test.sh")
                      .build())
              .build();

      Volume volume = Volume.newBuilder()
          .setGcs(GCS.newBuilder()
              .setRemotePath(bucketName)
              .build())
          .setMountPath("/mnt/share")
          .build();

      // We can specify what resources are requested by each task.
      ComputeResource computeResource =
          ComputeResource.newBuilder()
              // In milliseconds per cpu-second. This means the task requires 50% of a single CPUs.
              .setCpuMilli(500)
              // In MiB.
              .setMemoryMib(16)
              .build();

      TaskSpec task =
          TaskSpec.newBuilder()
              // Jobs can be divided into tasks. In this case, we have only one task.
              .addRunnables(runnable)
              .addVolumes(volume)
              .setComputeResource(computeResource)
              .setMaxRetryCount(2)
              .setMaxRunDuration(Duration.newBuilder().setSeconds(3600).build())
              .build();

      // Tasks are grouped inside a job using TaskGroups.
      // Currently, it's possible to have only one task group.
      TaskGroup taskGroup = TaskGroup.newBuilder().setTaskCount(4).setTaskSpec(task).build();

      // Policies are used to define on what kind of virtual machines the tasks will run on.
      // In this case, we tell the system to use "e2-standard-4" machine type.
      // Read more about machine types here: https://cloud.google.com/compute/docs/machine-types
      InstancePolicy instancePolicy =
          InstancePolicy.newBuilder().setMachineType("e2-standard-4").build();

      AllocationPolicy allocationPolicy =
          AllocationPolicy.newBuilder()
              .addInstances(InstancePolicyOrTemplate.newBuilder().setPolicy(instancePolicy).build())
              .build();

      Job job =
          Job.newBuilder()
              .addTaskGroups(taskGroup)
              .setAllocationPolicy(allocationPolicy)
              .putLabels("env", "testing")
              .putLabels("type", "script")
              .putLabels("mount", "bucket")
              // We use Cloud Logging as it's an out of the box available option.
              .setLogsPolicy(
                  LogsPolicy.newBuilder().setDestination(Destination.CLOUD_LOGGING).build())
              .build();

      CreateJobRequest createJobRequest =
          CreateJobRequest.newBuilder()
              // The job's parent is the region in which the job will run.
              .setParent(String.format("projects/%s/locations/%s", projectId, region))
              .setJob(job)
              .setJobId(jobName)
              .build();

      Job result =
          batchServiceClient
              .createJobCallable()
              .futureCall(createJobRequest)
              .get(5, TimeUnit.MINUTES);

      System.out.printf("Successfully created the job: %s", result.getName());
    }
  }
}

Node.js

Per scoprire di più, consulta la documentazione di riferimento per le API Node.js in batch.

Per eseguire l'autenticazione in batch, configura le Credenziali predefinite dell'applicazione. Per saperne di più, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.

/**
 * TODO(developer): Uncomment and replace these variables before running the sample.
 */
// const projectId = 'YOUR_PROJECT_ID';
/**
 * The region you want to the job to run in. The regions that support Batch are listed here:
 * https://cloud.google.com/batch/docs/get-started#locations
 */
// const region = 'us-central-1';
/**
 * The name of the job that will be created.
 * It needs to be unique for each project and region pair.
 */
// const jobName = 'YOUR_JOB_NAME';
/**
 * The name of the bucket to be mounted.
 */
// const bucketName = 'YOUR_BUCKET_NAME';

// Imports the Batch library
const batchLib = require('@google-cloud/batch');
const batch = batchLib.protos.google.cloud.batch.v1;

// Instantiates a client
const batchClient = new batchLib.v1.BatchServiceClient();

// Define what will be done as part of the job.
const task = new batch.TaskSpec();
const runnable = new batch.Runnable();
runnable.script = new batch.Runnable.Script();
runnable.script.text =
  'echo Hello world from task ${BATCH_TASK_INDEX}. >> /mnt/share/output_task_${BATCH_TASK_INDEX}.txt';
// You can also run a script from a file. Just remember, that needs to be a script that's
// already on the VM that will be running the job. Using runnable.script.text and runnable.script.path is mutually
// exclusive.
// runnable.script.path = '/tmp/test.sh'
task.runnables = [runnable];

const gcsBucket = new batch.GCS();
gcsBucket.remotePath = bucketName;
const gcsVolume = new batch.Volume();
gcsVolume.gcs = gcsBucket;
gcsVolume.mountPath = '/mnt/share';
task.volumes = [gcsVolume];

// We can specify what resources are requested by each task.
const resources = new batch.ComputeResource();
resources.cpuMilli = 2000; // in milliseconds per cpu-second. This means the task requires 2 whole CPUs.
resources.memoryMib = 16;
task.computeResource = resources;

task.maxRetryCount = 2;
task.maxRunDuration = {seconds: 3600};

// Tasks are grouped inside a job using TaskGroups.
const group = new batch.TaskGroup();
group.taskCount = 4;
group.taskSpec = task;

// Policies are used to define on what kind of virtual machines the tasks will run on.
// In this case, we tell the system to use "e2-standard-4" machine type.
// Read more about machine types here: https://cloud.google.com/compute/docs/machine-types
const allocationPolicy = new batch.AllocationPolicy();
const policy = new batch.AllocationPolicy.InstancePolicy();
policy.machineType = 'e2-standard-4';
const instances = new batch.AllocationPolicy.InstancePolicyOrTemplate();
instances.policy = policy;
allocationPolicy.instances = [instances];

const job = new batch.Job();
job.name = jobName;
job.taskGroups = [group];
job.allocationPolicy = allocationPolicy;
job.labels = {env: 'testing', type: 'script'};
// We use Cloud Logging as it's an option available out of the box
job.logsPolicy = new batch.LogsPolicy();
job.logsPolicy.destination = batch.LogsPolicy.Destination.CLOUD_LOGGING;

// The job's parent is the project and region in which the job will run
const parent = `projects/${projectId}/locations/${region}`;

async function callCreateJob() {
  // Construct request
  const request = {
    parent,
    jobId: jobName,
    job,
  };

  // Run request
  const response = await batchClient.createJob(request);
  console.log(response);
}

callCreateJob();

Python

Per scoprire di più, consulta la documentazione di riferimento per le API Python in batch.

Per eseguire l'autenticazione in batch, configura le Credenziali predefinite dell'applicazione. Per saperne di più, consulta Configurare l'autenticazione per un ambiente di sviluppo locale.

from google.cloud import batch_v1

def create_script_job_with_bucket(project_id: str, region: str, job_name: str, bucket_name: str) -> batch_v1.Job:
    """
    This method shows how to create a sample Batch Job that will run
    a simple command on Cloud Compute instances.

    Args:
        project_id: project ID or project number of the Cloud project you want to use.
        region: name of the region you want to use to run the job. Regions that are
            available for Batch are listed on: https://cloud.google.com/batch/docs/get-started#locations
        job_name: the name of the job that will be created.
            It needs to be unique for each project and region pair.
        bucket_name: name of the bucket to be mounted for your Job.

    Returns:
        A job object representing the job created.
    """
    client = batch_v1.BatchServiceClient()

    # Define what will be done as part of the job.
    task = batch_v1.TaskSpec()
    runnable = batch_v1.Runnable()
    runnable.script = batch_v1.Runnable.Script()
    runnable.script.text = "echo Hello world from task ${BATCH_TASK_INDEX}. >> /mnt/share/output_task_${BATCH_TASK_INDEX}.txt"
    task.runnables = [runnable]

    gcs_bucket = batch_v1.GCS()
    gcs_bucket.remote_path = bucket_name
    gcs_volume = batch_v1.Volume()
    gcs_volume.gcs = gcs_bucket
    gcs_volume.mount_path = '/mnt/share'
    task.volumes = [gcs_volume]

    # We can specify what resources are requested by each task.
    resources = batch_v1.ComputeResource()
    resources.cpu_milli = 500  # in milliseconds per cpu-second. This means the task requires 50% of a single CPUs.
    resources.memory_mib = 16
    task.compute_resource = resources

    task.max_retry_count = 2
    task.max_run_duration = "3600s"

    # Tasks are grouped inside a job using TaskGroups.
    # Currently, it's possible to have only one task group.
    group = batch_v1.TaskGroup()
    group.task_count = 4
    group.task_spec = task

    # Policies are used to define on what kind of virtual machines the tasks will run on.
    # In this case, we tell the system to use "e2-standard-4" machine type.
    # Read more about machine types here: https://cloud.google.com/compute/docs/machine-types
    allocation_policy = batch_v1.AllocationPolicy()
    policy = batch_v1.AllocationPolicy.InstancePolicy()
    policy.machine_type = "e2-standard-4"
    instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()
    instances.policy = policy
    allocation_policy.instances = [instances]

    job = batch_v1.Job()
    job.task_groups = [group]
    job.allocation_policy = allocation_policy
    job.labels = {"env": "testing", "type": "script", "mount": "bucket"}
    # We use Cloud Logging as it's an out of the box available option
    job.logs_policy = batch_v1.LogsPolicy()
    job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING

    create_request = batch_v1.CreateJobRequest()
    create_request.job = job
    create_request.job_id = job_name
    # The job's parent is the region in which the job will run
    create_request.parent = f"projects/{project_id}/locations/{region}"

    return client.create_job(create_request)

Passaggi successivi

Per cercare e filtrare esempi di codice per altri prodotti Google Cloud, consulta il browser di esempio Google Cloud.