Create dataset

Create a dataset.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Go

Before trying this sample, follow the Go setup instructions in the Cloud Healthcare API quickstart using client libraries. For more information, see the Cloud Healthcare API Go API reference documentation.

To authenticate to Cloud Healthcare API, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"
	"time"

	healthcare "google.golang.org/api/healthcare/v1"
)

// createDataset creates a dataset.
func createDataset(w io.Writer, projectID, location, datasetID string) error {
	// Set a deadline for the dataset to become initialized.
	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
	defer cancel()

	healthcareService, err := healthcare.NewService(ctx)
	if err != nil {
		return fmt.Errorf("healthcare.NewService: %w", err)
	}

	datasetsService := healthcareService.Projects.Locations.Datasets

	parent := fmt.Sprintf("projects/%s/locations/%s", projectID, location)

	resp, err := datasetsService.Create(parent, &healthcare.Dataset{}).DatasetId(datasetID).Context(ctx).Do()
	if err != nil {
		return fmt.Errorf("Create: %w", err)
	}

	// The dataset is not always ready to use immediately, instead a long-running operation is returned.
	// This is how you might poll the operation to ensure the dataset is fully initialized before proceeding.
	// Initialization usually takes less than a minute.
	for !resp.Done {
		time.Sleep(15 * time.Second)
		resp, err = datasetsService.Operations.Get(resp.Name).Context(ctx).Do()
		if err != nil {
			return fmt.Errorf("Operations.Get(%s): %w", resp.Name, err)
		}
	}

	fmt.Fprintf(w, "Created dataset: %q\n", resp.Name)
	return nil
}

Java

Before trying this sample, follow the Java setup instructions in the Cloud Healthcare API quickstart using client libraries. For more information, see the Cloud Healthcare API Java API reference documentation.

To authenticate to Cloud Healthcare API, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory;
import com.google.api.services.healthcare.v1.CloudHealthcare;
import com.google.api.services.healthcare.v1.CloudHealthcare.Projects.Locations.Datasets;
import com.google.api.services.healthcare.v1.CloudHealthcareScopes;
import com.google.api.services.healthcare.v1.model.Dataset;
import com.google.api.services.healthcare.v1.model.Operation;
import com.google.auth.http.HttpCredentialsAdapter;
import com.google.auth.oauth2.GoogleCredentials;
import java.io.IOException;
import java.util.Collections;

public class DatasetCreate {
  private static final String DATASET_NAME = "projects/%s/locations/%s/datasets/%s";
  private static final JsonFactory JSON_FACTORY = new GsonFactory();
  private static final NetHttpTransport HTTP_TRANSPORT = new NetHttpTransport();

  public static void datasetCreate(String projectId, String regionId, String datasetId)
      throws IOException {
    // String projectId = "your-project-id";
    // String regionId = "us-central1";
    // String datasetId = "your-dataset-id";

    // Initialize the client, which will be used to interact with the service.
    CloudHealthcare client = createClient();

    // Configure the dataset to be created.
    Dataset dataset = new Dataset();
    dataset.setTimeZone("America/Chicago");

    // Create request and configure any parameters.
    String parentName = String.format("projects/%s/locations/%s", projectId, regionId);
    Datasets.Create request = client.projects().locations().datasets().create(parentName, dataset);
    request.setDatasetId(datasetId);

    // Execute the request, wait for the operation to complete, and process the results.
    try {
      Operation operation = request.execute();
      System.out.println(operation.toPrettyString());
      while (operation.getDone() == null || !operation.getDone()) {
        // Update the status of the operation with another request.
        Thread.sleep(500); // Pause for 500ms between requests.
        operation =
            client
                .projects()
                .locations()
                .datasets()
                .operations()
                .get(operation.getName())
                .execute();
      }
      System.out.println("Dataset created. Response content: " + operation.getResponse());
    } catch (Exception ex) {
      System.out.printf("Error during request execution: %s\n", ex.toString());
      ex.printStackTrace(System.out);
    }
  }

  private static CloudHealthcare createClient() throws IOException {
    // Use Application Default Credentials (ADC) to authenticate the requests
    // For more information see https://cloud.google.com/docs/authentication/production
    GoogleCredentials credential =
        GoogleCredentials.getApplicationDefault()
            .createScoped(Collections.singleton(CloudHealthcareScopes.CLOUD_PLATFORM));

    // Create a HttpRequestInitializer, which will provide a baseline configuration to all requests.
    HttpRequestInitializer requestInitializer =
        request -> {
          new HttpCredentialsAdapter(credential).initialize(request);
          request.setConnectTimeout(60000); // 1 minute connect timeout
          request.setReadTimeout(60000); // 1 minute read timeout
        };

    // Build the client for interacting with the service.
    return new CloudHealthcare.Builder(HTTP_TRANSPORT, JSON_FACTORY, requestInitializer)
        .setApplicationName("your-application-name")
        .build();
  }
}

Node.js

Before trying this sample, follow the Node.js setup instructions in the Cloud Healthcare API quickstart using client libraries. For more information, see the Cloud Healthcare API Node.js API reference documentation.

To authenticate to Cloud Healthcare API, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

const google = require('@googleapis/healthcare');
const healthcare = google.healthcare({
  version: 'v1',
  auth: new google.auth.GoogleAuth({
    scopes: ['https://www.googleapis.com/auth/cloud-platform'],
  }),
});

const createDataset = async () => {
  // TODO(developer): uncomment these lines before running the sample
  // const cloudRegion = 'us-central1';
  // const projectId = 'adjective-noun-123';
  // const datasetId = 'my-dataset';
  const parent = `projects/${projectId}/locations/${cloudRegion}`;
  const request = {parent, datasetId};

  await healthcare.projects.locations.datasets.create(request);
  console.log(`Created dataset: ${datasetId}`);
};

createDataset();

Python

Before trying this sample, follow the Python setup instructions in the Cloud Healthcare API quickstart using client libraries. For more information, see the Cloud Healthcare API Python API reference documentation.

To authenticate to Cloud Healthcare API, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

# Imports the Dict type for runtime type hints.
from typing import Dict


def create_dataset(project_id: str, location: str, dataset_id: str) -> Dict[str, str]:
    """Creates a Cloud Healthcare API dataset.

    See
    https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/healthcare/api-client/v1/datasets
    before running the sample.
    See
    https://googleapis.github.io/google-api-python-client/docs/dyn/healthcare_v1.projects.locations.datasets.html#create
    for the Python API reference.

    Args:
      project_id: The project ID or project number of the Google Cloud project you want
          to use.
      location: The name of the dataset's location.
      dataset_id: The ID of the dataset to create.

    Returns:
      A dictionary representing a long-running operation that results from
      calling the 'CreateDataset' method. Dataset creation is typically fast.
    """
    # Imports the Python built-in time module.
    import time

    # Imports the Google API Discovery Service.
    from googleapiclient import discovery

    # Imports HttpError from the Google Python API client errors module.
    from googleapiclient.errors import HttpError

    api_version = "v1"
    service_name = "healthcare"
    # Returns an authorized API client by discovering the Healthcare API
    # and using GOOGLE_APPLICATION_CREDENTIALS environment variable.
    client = discovery.build(service_name, api_version)

    # TODO(developer): Uncomment these lines and replace with your values.
    # project_id = 'my-project'
    # location = 'us-central1'
    # dataset_id = 'my-dataset'
    dataset_parent = f"projects/{project_id}/locations/{location}"

    request = (
        client.projects()
        .locations()
        .datasets()
        .create(parent=dataset_parent, body={}, datasetId=dataset_id)
    )

    # Wait for operation to complete.
    start_time = time.time()
    max_time = 600  # 10 minutes, but dataset creation is typically only a few seconds.

    try:
        operation = request.execute()
        while not operation.get("done", False):
            # Poll until the operation finishes.
            print("Waiting for operation to finish...")
            if time.time() - start_time > max_time:
                raise TimeoutError("Timed out waiting for operation to finish.")
            operation = (
                client.projects()
                .locations()
                .datasets()
                .operations()
                .get(name=operation["name"])
                .execute()
            )
            # Wait 5 seconds between each poll to the operation.
            time.sleep(5)

        if "error" in operation:
            raise RuntimeError(f"Create dataset operation failed: {operation['error']}")
        else:
            dataset_name = operation["response"]["name"]
            print(f"Created dataset: {dataset_name}")
            return operation

    except HttpError as err:
        # A common error is when the dataset already exists.
        if err.resp.status == 409:
            print(f"Dataset with ID {dataset_id} already exists.")
            return
        else:
            raise err

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.