Desidentificar datos de DICOM con keeplist

Quita la PII de los datos de DICOM mediante una keeplist.

Páginas de documentación que incluyen esta muestra de código

Para ver la muestra de código usada en contexto, consulta la siguiente documentación:

Muestra de código

Go

import (
	"context"
	"fmt"
	"io"
	"time"

	healthcare "google.golang.org/api/healthcare/v1"
)

// deidentifyDataset creates a new dataset containing de-identified data from the source dataset.
func deidentifyDataset(w io.Writer, projectID, location, sourceDatasetID, destinationDatasetID string) error {
	ctx := context.Background()

	healthcareService, err := healthcare.NewService(ctx)
	if err != nil {
		return fmt.Errorf("healthcare.NewService: %v", err)
	}

	datasetsService := healthcareService.Projects.Locations.Datasets

	parent := fmt.Sprintf("projects/%s/locations/%s", projectID, location)

	req := &healthcare.DeidentifyDatasetRequest{
		DestinationDataset: fmt.Sprintf("%s/datasets/%s", parent, destinationDatasetID),
		Config: &healthcare.DeidentifyConfig{
			Dicom: &healthcare.DicomConfig{
				KeepList: &healthcare.TagFilterList{
					Tags: []string{
						"PatientID",
					},
				},
			},
		},
	}

	sourceName := fmt.Sprintf("%s/datasets/%s", parent, sourceDatasetID)
	resp, err := datasetsService.Deidentify(sourceName, req).Do()
	if err != nil {
		return fmt.Errorf("Deidentify: %v", err)
	}

	// Wait for the deidentification operation to finish.
	operationService := healthcareService.Projects.Locations.Datasets.Operations
	for {
		op, err := operationService.Get(resp.Name).Do()
		if err != nil {
			return fmt.Errorf("operationService.Get: %v", err)
		}
		if !op.Done {
			time.Sleep(1 * time.Second)
			continue
		}
		if op.Error != nil {
			return fmt.Errorf("deidentify operation error: %v", *op.Error)
		}
		fmt.Fprintf(w, "Created de-identified dataset %s from %s\n", resp.Name, sourceName)
		return nil
	}
}

Java

import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.healthcare.v1.CloudHealthcare;
import com.google.api.services.healthcare.v1.CloudHealthcare.Projects.Locations.Datasets;
import com.google.api.services.healthcare.v1.CloudHealthcareScopes;
import com.google.api.services.healthcare.v1.model.DeidentifyConfig;
import com.google.api.services.healthcare.v1.model.DeidentifyDatasetRequest;
import com.google.api.services.healthcare.v1.model.DicomConfig;
import com.google.api.services.healthcare.v1.model.Operation;
import com.google.api.services.healthcare.v1.model.TagFilterList;
import com.google.auth.http.HttpCredentialsAdapter;
import com.google.auth.oauth2.GoogleCredentials;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;

public class DatasetDeIdentify {
  private static final String DATASET_NAME = "projects/%s/locations/%s/datasets/%s";
  private static final JsonFactory JSON_FACTORY = new JacksonFactory();
  private static final NetHttpTransport HTTP_TRANSPORT = new NetHttpTransport();

  public static void datasetDeIdentify(String srcDatasetName, String destDatasetName)
      throws IOException {
    // String srcDatasetName =
    //     String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-src-dataset-id");
    // String destDatasetName =
    //    String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-dest-dataset-id");

    // Initialize the client, which will be used to interact with the service.
    CloudHealthcare client = createClient();

    // Configure what information needs to be De-Identified.
    // For more information on de-identifying using tags, please see the following:
    // https://cloud.google.com/healthcare/docs/how-tos/dicom-deidentify#de-identification_using_tags
    TagFilterList tags = new TagFilterList().setTags(Arrays.asList("PatientID"));
    DicomConfig dicomConfig = new DicomConfig().setKeepList(tags);
    DeidentifyConfig config = new DeidentifyConfig().setDicom(dicomConfig);

    // Create the de-identify request and configure any parameters.
    DeidentifyDatasetRequest deidentifyRequest =
        new DeidentifyDatasetRequest().setDestinationDataset(destDatasetName).setConfig(config);
    Datasets.Deidentify request =
        client.projects().locations().datasets().deidentify(srcDatasetName, deidentifyRequest);

    // Execute the request, wait for the operation to complete, and process the results.
    try {
      Operation operation = request.execute();
      while (operation.getDone() == null || !operation.getDone()) {
        // Update the status of the operation with another request.
        Thread.sleep(500); // Pause for 500ms between requests.
        operation =
            client
                .projects()
                .locations()
                .datasets()
                .operations()
                .get(operation.getName())
                .execute();
      }
      System.out.println(
          "De-identified Dataset created. Response content: " + operation.getResponse());
    } catch (Exception ex) {
      System.out.printf("Error during request execution: %s", ex.toString());
      ex.printStackTrace(System.out);
    }
  }

  private static CloudHealthcare createClient() throws IOException {
    // Use Application Default Credentials (ADC) to authenticate the requests
    // For more information see https://cloud.google.com/docs/authentication/production
    GoogleCredentials credential =
        GoogleCredentials.getApplicationDefault()
            .createScoped(Collections.singleton(CloudHealthcareScopes.CLOUD_PLATFORM));

    // Create a HttpRequestInitializer, which will provide a baseline configuration to all requests.
    HttpRequestInitializer requestInitializer =
        request -> {
          new HttpCredentialsAdapter(credential).initialize(request);
          request.setConnectTimeout(60000); // 1 minute connect timeout
          request.setReadTimeout(60000); // 1 minute read timeout
        };

    // Build the client for interacting with the service.
    return new CloudHealthcare.Builder(HTTP_TRANSPORT, JSON_FACTORY, requestInitializer)
        .setApplicationName("your-application-name")
        .build();
  }
}

Node.js

const google = require('@googleapis/healthcare');
const healthcare = google.healthcare({
  version: 'v1',
  auth: new google.auth.GoogleAuth({
    scopes: ['https://www.googleapis.com/auth/cloud-platform'],
  }),
});

const deidentifyDataset = async () => {
  // TODO(developer): uncomment these lines before running the sample
  // const cloudRegion = 'us-central1';
  // const projectId = 'adjective-noun-123';
  // const sourceDatasetId = 'my-source-dataset';
  // const destinationDatasetId = 'my-destination-dataset';
  // const keeplistTags = 'PatientID'
  const sourceDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${sourceDatasetId}`;
  const destinationDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${destinationDatasetId}`;
  const request = {
    sourceDataset: sourceDataset,
    destinationDataset: destinationDataset,
    resource: {
      config: {
        dicom: {
          keepList: {
            tags: [keeplistTags],
          },
        },
      },
    },
  };

  await healthcare.projects.locations.datasets.deidentify(request);
  console.log(
    `De-identified data written from dataset ${sourceDatasetId} to dataset ${destinationDatasetId}`
  );
};

deidentifyDataset();

Python

def deidentify_dataset(project_id, location, dataset_id, destination_dataset_id):
    """Uses a DICOM tag keeplist to create a new dataset containing
    de-identified DICOM data from the source dataset.

    See https://github.com/GoogleCloudPlatform/python-docs-samples/tree/master/healthcare/api-client/v1/datasets
    before running the sample."""
    # Imports the Google API Discovery Service.
    from googleapiclient import discovery

    api_version = "v1"
    service_name = "healthcare"
    # Returns an authorized API client by discovering the Healthcare API
    # and using GOOGLE_APPLICATION_CREDENTIALS environment variable.
    client = discovery.build(service_name, api_version)

    # TODO(developer): Uncomment these lines and replace with your values.
    # project_id = 'my-project'  # replace with your GCP project ID
    # location = 'us-central1'  # replace with the dataset's location
    # dataset_id = 'my-source-dataset'  # replace with the source dataset's ID
    # destination_dataset_id = 'my-destination-dataset'  # replace with the destination dataset's ID
    source_dataset = "projects/{}/locations/{}/datasets/{}".format(
        project_id, location, dataset_id
    )
    destination_dataset = "projects/{}/locations/{}/datasets/{}".format(
        project_id, location, destination_dataset_id
    )

    body = {
        "destinationDataset": destination_dataset,
        "config": {
            "dicom": {
                "keepList": {
                    "tags": [
                        "Columns",
                        "NumberOfFrames",
                        "PixelRepresentation",
                        "MediaStorageSOPClassUID",
                        "MediaStorageSOPInstanceUID",
                        "Rows",
                        "SamplesPerPixel",
                        "BitsAllocated",
                        "HighBit",
                        "PhotometricInterpretation",
                        "BitsStored",
                        "PatientID",
                        "TransferSyntaxUID",
                        "SOPInstanceUID",
                        "StudyInstanceUID",
                        "SeriesInstanceUID",
                        "PixelData",
                    ]
                }
            }
        },
    }

    request = (
        client.projects()
        .locations()
        .datasets()
        .deidentify(sourceDataset=source_dataset, body=body)
    )

    response = request.execute()
    print(
        "Data in dataset {} de-identified."
        "De-identified data written to {}".format(dataset_id, destination_dataset_id)
    )
    return response

¿Qué sigue?

Para buscar y filtrar muestras de código para otros productos de Google Cloud, consulta el navegador de muestra de Google Cloud.