Create cluster

Creates a Dataproc cluster.

Documentation pages that include this code sample

To view the code sample used in context, see the following documentation:

Code sample


Before trying this sample, follow the Go setup instructions in the Dataproc Quickstart Using Client Libraries. For more information, see the Dataproc Go API reference documentation.

import (

	dataproc ""
	dataprocpb ""

func createCluster(w io.Writer, projectID, region, clusterName string) error {
	// projectID := "your-project-id"
	// region := "us-central1"
	// clusterName := "your-cluster"
	ctx := context.Background()

	// Create the cluster client.
	endpoint := region + ""
	clusterClient, err := dataproc.NewClusterControllerClient(ctx, option.WithEndpoint(endpoint))
	if err != nil {
		return fmt.Errorf("dataproc.NewClusterControllerClient: %v", err)
	defer clusterClient.Close()

	// Create the cluster config.
	req := &dataprocpb.CreateClusterRequest{
		ProjectId: projectID,
		Region:    region,
		Cluster: &dataprocpb.Cluster{
			ProjectId:   projectID,
			ClusterName: clusterName,
			Config: &dataprocpb.ClusterConfig{
				MasterConfig: &dataprocpb.InstanceGroupConfig{
					NumInstances:   1,
					MachineTypeUri: "n1-standard-2",
				WorkerConfig: &dataprocpb.InstanceGroupConfig{
					NumInstances:   2,
					MachineTypeUri: "n1-standard-2",

	// Create the cluster.
	op, err := clusterClient.CreateCluster(ctx, req)
	if err != nil {
		return fmt.Errorf("CreateCluster: %v", err)

	resp, err := op.Wait(ctx)
	if err != nil {
		return fmt.Errorf("CreateCluster.Wait: %v", err)

	// Output a success message.
	fmt.Fprintf(w, "Cluster created successfully: %s", resp.ClusterName)
	return nil


Before trying this sample, follow the Java setup instructions in the Dataproc Quickstart Using Client Libraries. For more information, see the Dataproc Java API reference documentation.

import java.util.concurrent.ExecutionException;

public class CreateCluster {

  public static void createCluster() throws IOException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    String region = "your-project-region";
    String clusterName = "your-cluster-name";
    createCluster(projectId, region, clusterName);

  public static void createCluster(String projectId, String region, String clusterName)
      throws IOException, InterruptedException {
    String myEndpoint = String.format("", region);

    // Configure the settings for the cluster controller client.
    ClusterControllerSettings clusterControllerSettings =

    // Create a cluster controller client with the configured settings. The client only needs to be
    // created once and can be reused for multiple requests. Using a try-with-resources
    // closes the client, but this can also be done manually with the .close() method.
    try (ClusterControllerClient clusterControllerClient =
        ClusterControllerClient.create(clusterControllerSettings)) {
      // Configure the settings for our cluster.
      InstanceGroupConfig masterConfig =
      InstanceGroupConfig workerConfig =
      ClusterConfig clusterConfig =
      // Create the cluster object with the desired cluster config.
      Cluster cluster =

      // Create the Cloud Dataproc cluster.
      OperationFuture<Cluster, ClusterOperationMetadata> createClusterAsyncRequest =
          clusterControllerClient.createClusterAsync(projectId, region, cluster);
      Cluster response = createClusterAsyncRequest.get();

      // Print out a success message.
      System.out.printf("Cluster created successfully: %s", response.getClusterName());

    } catch (ExecutionException e) {
      System.err.println(String.format("Error executing createCluster: %s ", e.getMessage()));


Before trying this sample, follow the Node.js setup instructions in the Dataproc Quickstart Using Client Libraries. For more information, see the Dataproc Node.js API reference documentation.

const dataproc = require('@google-cloud/dataproc');

// TODO(developer): Uncomment and set the following variables
// projectId = 'YOUR_PROJECT_ID'
// clusterName = 'YOUR_CLUSTER_NAME'

// Create a client with the endpoint set to the desired cluster region
const client = new dataproc.v1.ClusterControllerClient({
  apiEndpoint: `${region}`,
  projectId: projectId,

async function createCluster() {
  // Create the cluster config
  const request = {
    projectId: projectId,
    region: region,
    cluster: {
      clusterName: clusterName,
      config: {
        masterConfig: {
          numInstances: 1,
          machineTypeUri: 'n1-standard-2',
        workerConfig: {
          numInstances: 2,
          machineTypeUri: 'n1-standard-2',

  // Create the cluster
  const [operation] = await client.createCluster(request);
  const [response] = await operation.promise();

  // Output a success message
  console.log(`Cluster created successfully: ${response.clusterName}`);


Before trying this sample, follow the Python setup instructions in the Dataproc Quickstart Using Client Libraries. For more information, see the Dataproc Python API reference documentation.

from import dataproc_v1 as dataproc

def create_cluster(project_id, region, cluster_name):
    """This sample walks a user through creating a Cloud Dataproc cluster
       using the Python client library.

           project_id (string): Project to use for creating resources.
           region (string): Region where the resources should live.
           cluster_name (string): Name to use for creating a cluster.

    # Create a client with the endpoint set to the desired cluster region.
    cluster_client = dataproc.ClusterControllerClient(
        client_options={"api_endpoint": f"{region}"}

    # Create the cluster config.
    cluster = {
        "project_id": project_id,
        "cluster_name": cluster_name,
        "config": {
            "master_config": {"num_instances": 1, "machine_type_uri": "n1-standard-2"},
            "worker_config": {"num_instances": 2, "machine_type_uri": "n1-standard-2"},

    # Create the cluster.
    operation = cluster_client.create_cluster(
        request={"project_id": project_id, "region": region, "cluster": cluster}
    result = operation.result()

    # Output a success message.
    print(f"Cluster created successfully: {result.cluster_name}")

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser