Anonymiser des données DICOM à l'aide de keeplist

Supprimez les informations personnelles des données DICOM à l'aide de keeplist.

En savoir plus

Pour obtenir une documentation détaillée incluant cet exemple de code, consultez les articles suivants :

Exemple de code

Go

Avant d'essayer cet exemple, suivez les instructions de configuration de Go dans le guide de démarrage rapide de l'API Cloud Healthcare à l'aide des bibliothèques clientes. Pour en savoir plus, consultez la documentation de référence de l'API Go de l'API Cloud Healthcare.

Pour vous authentifier auprès de l'API Cloud Healthcare, configurez les Identifiants par défaut de l'application. Pour en savoir plus, consultez Configurer l'authentification pour un environnement de développement local.

import (
	"context"
	"fmt"
	"io"
	"time"

	healthcare "google.golang.org/api/healthcare/v1"
)

// deidentifyDataset creates a new dataset containing de-identified data from the source dataset.
func deidentifyDataset(w io.Writer, projectID, location, sourceDatasetID, destinationDatasetID string) error {
	ctx := context.Background()

	healthcareService, err := healthcare.NewService(ctx)
	if err != nil {
		return fmt.Errorf("healthcare.NewService: %w", err)
	}

	datasetsService := healthcareService.Projects.Locations.Datasets

	parent := fmt.Sprintf("projects/%s/locations/%s", projectID, location)

	req := &healthcare.DeidentifyDatasetRequest{
		DestinationDataset: fmt.Sprintf("%s/datasets/%s", parent, destinationDatasetID),
		Config: &healthcare.DeidentifyConfig{
			Dicom: &healthcare.DicomConfig{
				KeepList: &healthcare.TagFilterList{
					Tags: []string{
						"PatientID",
					},
				},
			},
		},
	}

	sourceName := fmt.Sprintf("%s/datasets/%s", parent, sourceDatasetID)
	resp, err := datasetsService.Deidentify(sourceName, req).Do()
	if err != nil {
		return fmt.Errorf("Deidentify: %w", err)
	}

	// Wait for the deidentification operation to finish.
	operationService := healthcareService.Projects.Locations.Datasets.Operations
	for {
		op, err := operationService.Get(resp.Name).Do()
		if err != nil {
			return fmt.Errorf("operationService.Get: %w", err)
		}
		if !op.Done {
			time.Sleep(1 * time.Second)
			continue
		}
		if op.Error != nil {
			return fmt.Errorf("deidentify operation error: %v", *op.Error)
		}
		fmt.Fprintf(w, "Created de-identified dataset %s from %s\n", resp.Name, sourceName)
		return nil
	}
}

Java

Avant d'essayer cet exemple, suivez les instructions de configuration de Java dans le guide de démarrage rapide de l'API Cloud Healthcare à l'aide des bibliothèques clientes. Pour en savoir plus, consultez la documentation de référence de l'API Java de l'API Cloud Healthcare.

Pour vous authentifier auprès de l'API Cloud Healthcare, configurez les Identifiants par défaut de l'application. Pour en savoir plus, consultez Configurer l'authentification pour un environnement de développement local.

import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory;
import com.google.api.services.healthcare.v1.CloudHealthcare;
import com.google.api.services.healthcare.v1.CloudHealthcare.Projects.Locations.Datasets;
import com.google.api.services.healthcare.v1.CloudHealthcareScopes;
import com.google.api.services.healthcare.v1.model.DeidentifyConfig;
import com.google.api.services.healthcare.v1.model.DeidentifyDatasetRequest;
import com.google.api.services.healthcare.v1.model.DicomConfig;
import com.google.api.services.healthcare.v1.model.Operation;
import com.google.api.services.healthcare.v1.model.TagFilterList;
import com.google.auth.http.HttpCredentialsAdapter;
import com.google.auth.oauth2.GoogleCredentials;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;

public class DatasetDeIdentify {
  private static final String DATASET_NAME = "projects/%s/locations/%s/datasets/%s";
  private static final JsonFactory JSON_FACTORY = new GsonFactory();
  private static final NetHttpTransport HTTP_TRANSPORT = new NetHttpTransport();

  public static void datasetDeIdentify(String srcDatasetName, String destDatasetName)
      throws IOException {
    // String srcDatasetName =
    //     String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-src-dataset-id");
    // String destDatasetName =
    //    String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-dest-dataset-id");

    // Initialize the client, which will be used to interact with the service.
    CloudHealthcare client = createClient();

    // Configure what information needs to be De-Identified.
    // For more information on de-identifying using tags, please see the following:
    // https://cloud.google.com/healthcare/docs/how-tos/dicom-deidentify#de-identification_using_tags
    TagFilterList tags = new TagFilterList().setTags(Arrays.asList("PatientID"));
    DicomConfig dicomConfig = new DicomConfig().setKeepList(tags);
    DeidentifyConfig config = new DeidentifyConfig().setDicom(dicomConfig);

    // Create the de-identify request and configure any parameters.
    DeidentifyDatasetRequest deidentifyRequest =
        new DeidentifyDatasetRequest().setDestinationDataset(destDatasetName).setConfig(config);
    Datasets.Deidentify request =
        client.projects().locations().datasets().deidentify(srcDatasetName, deidentifyRequest);

    // Execute the request, wait for the operation to complete, and process the results.
    try {
      Operation operation = request.execute();
      while (operation.getDone() == null || !operation.getDone()) {
        // Update the status of the operation with another request.
        Thread.sleep(500); // Pause for 500ms between requests.
        operation =
            client
                .projects()
                .locations()
                .datasets()
                .operations()
                .get(operation.getName())
                .execute();
      }
      System.out.println(
          "De-identified Dataset created. Response content: " + operation.getResponse());
    } catch (Exception ex) {
      System.out.printf("Error during request execution: %s", ex.toString());
      ex.printStackTrace(System.out);
    }
  }

  private static CloudHealthcare createClient() throws IOException {
    // Use Application Default Credentials (ADC) to authenticate the requests
    // For more information see https://cloud.google.com/docs/authentication/production
    GoogleCredentials credential =
        GoogleCredentials.getApplicationDefault()
            .createScoped(Collections.singleton(CloudHealthcareScopes.CLOUD_PLATFORM));

    // Create a HttpRequestInitializer, which will provide a baseline configuration to all requests.
    HttpRequestInitializer requestInitializer =
        request -> {
          new HttpCredentialsAdapter(credential).initialize(request);
          request.setConnectTimeout(60000); // 1 minute connect timeout
          request.setReadTimeout(60000); // 1 minute read timeout
        };

    // Build the client for interacting with the service.
    return new CloudHealthcare.Builder(HTTP_TRANSPORT, JSON_FACTORY, requestInitializer)
        .setApplicationName("your-application-name")
        .build();
  }
}

Node.js

Avant d'essayer cet exemple, suivez les instructions de configuration de Node.js dans le guide de démarrage rapide de l'API Cloud Healthcare à l'aide des bibliothèques clientes. Pour en savoir plus, consultez la documentation de référence de l'API Node.js de l'API Cloud Healthcare.

Pour vous authentifier auprès de l'API Cloud Healthcare, configurez les Identifiants par défaut de l'application. Pour en savoir plus, consultez Configurer l'authentification pour un environnement de développement local.

const google = require('@googleapis/healthcare');
const healthcare = google.healthcare({
  version: 'v1',
  auth: new google.auth.GoogleAuth({
    scopes: ['https://www.googleapis.com/auth/cloud-platform'],
  }),
});

const deidentifyDataset = async () => {
  // TODO(developer): uncomment these lines before running the sample
  // const cloudRegion = 'us-central1';
  // const projectId = 'adjective-noun-123';
  // const sourceDatasetId = 'my-source-dataset';
  // const destinationDatasetId = 'my-destination-dataset';
  // const keeplistTags = 'PatientID'
  const sourceDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${sourceDatasetId}`;
  const destinationDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${destinationDatasetId}`;
  const request = {
    sourceDataset: sourceDataset,
    destinationDataset: destinationDataset,
    resource: {
      config: {
        dicom: {
          keepList: {
            tags: [keeplistTags],
          },
        },
      },
    },
  };

  await healthcare.projects.locations.datasets.deidentify(request);
  console.log(
    `De-identified data written from dataset ${sourceDatasetId} to dataset ${destinationDatasetId}`
  );
};

deidentifyDataset();

Python

Avant d'essayer cet exemple, suivez les instructions de configuration de Python dans le guide de démarrage rapide de l'API Cloud Healthcare à l'aide des bibliothèques clientes. Pour en savoir plus, consultez la documentation de référence de l'API Python de l'API Cloud Healthcare.

Pour vous authentifier auprès de l'API Cloud Healthcare, configurez les Identifiants par défaut de l'application. Pour en savoir plus, consultez Configurer l'authentification pour un environnement de développement local.

# Imports the Dict type for runtime type hints.
from typing import Dict

def deidentify_dataset(
    project_id: str,
    location: str,
    dataset_id: str,
    destination_dataset_id: str,
) -> Dict[str, str]:
    """Uses a DICOM tag keeplist to create a new dataset containing de-identified DICOM data from the source dataset.

    See
    https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/healthcare/api-client/v1/datasets
    before running the sample.
    See https://googleapis.github.io/google-api-python-client/docs/dyn/healthcare_v1.projects.locations.datasets.html#deidentify
    for the Python API reference.

    Args:
      project_id: The project ID or project number of the Google Cloud project you want
          to use.
      location: The name of the dataset's location.
      dataset_id: The ID of the source dataset containing the DICOM store to de-identify.
      destination_dataset_id: The ID of the dataset where de-identified DICOM data
        is written.

    Returns:
      A dictionary representing a long-running operation that results from
      calling the 'DeidentifyDataset' method. Use the
      'google.longrunning.Operation'
      API to poll the operation status.
    """
    # Imports the Python built-in time module.
    import time

    # Imports the Google API Discovery Service.
    from googleapiclient import discovery

    # Imports HttpError from the Google Python API client errors module.
    from googleapiclient.errors import HttpError

    api_version = "v1"
    service_name = "healthcare"
    # Returns an authorized API client by discovering the Healthcare API
    # and using GOOGLE_APPLICATION_CREDENTIALS environment variable.
    client = discovery.build(service_name, api_version)

    # TODO(developer): Uncomment these lines and replace with your values.
    # project_id = 'my-project'
    # location = 'us-central1'
    # dataset_id = 'my-source-dataset'
    # destination_dataset_id = 'my-destination-dataset'
    source_dataset = "projects/{}/locations/{}/datasets/{}".format(
        project_id, location, dataset_id
    )
    destination_dataset = "projects/{}/locations/{}/datasets/{}".format(
        project_id, location, destination_dataset_id
    )

    body = {
        "destinationDataset": destination_dataset,
        "config": {
            "dicom": {
                "keepList": {
                    "tags": [
                        "Columns",
                        "NumberOfFrames",
                        "PixelRepresentation",
                        "MediaStorageSOPClassUID",
                        "MediaStorageSOPInstanceUID",
                        "Rows",
                        "SamplesPerPixel",
                        "BitsAllocated",
                        "HighBit",
                        "PhotometricInterpretation",
                        "BitsStored",
                        "PatientID",
                        "TransferSyntaxUID",
                        "SOPInstanceUID",
                        "StudyInstanceUID",
                        "SeriesInstanceUID",
                        "PixelData",
                    ]
                }
            }
        },
    }

    request = (
        client.projects()
        .locations()
        .datasets()
        .deidentify(sourceDataset=source_dataset, body=body)
    )

    # Set a start time for operation completion.
    start_time = time.time()
    # TODO(developer): Increase the max_time if de-identifying many resources.
    max_time = 600

    try:
        operation = request.execute()
        while not operation.get("done", False):
            # Poll until the operation finishes.
            print("Waiting for operation to finish...")
            if time.time() - start_time > max_time:
                raise RuntimeError("Timed out waiting for operation to finish.")
            operation = (
                client.projects()
                .locations()
                .datasets()
                .operations()
                .get(name=operation["name"])
                .execute()
            )
            # Wait 5 seconds between each poll to the operation.
            time.sleep(5)

        if operation.get("error"):
            raise TimeoutError(f"De-identify operation failed: {operation['error']}")
        else:
            print(f"De-identified data to dataset: {destination_dataset_id}")
            print(
                f"Resources succeeded: {operation.get('metadata').get('counter').get('success')}"
            )
            print(
                f"Resources failed: {operation.get('metadata').get('counter').get('failure')}"
            )
            return operation

    except HttpError as err:
        # A common error is when the destination dataset already exists.
        if err.resp.status == 409:
            raise RuntimeError(
                f"Destination dataset with ID {destination_dataset_id} already exists."
            )
        else:
            raise err

Étapes suivantes

Pour rechercher et filtrer des exemples de code pour d'autres produits Google Cloud, consultez l'exemple de navigateur Google Cloud.