Remove PII from DICOM data by using a keeplist.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
Go
import (
"context"
"fmt"
"io"
"time"
healthcare "google.golang.org/api/healthcare/v1"
)
// deidentifyDataset creates a new dataset containing de-identified data from the source dataset.
func deidentifyDataset(w io.Writer, projectID, location, sourceDatasetID, destinationDatasetID string) error {
ctx := context.Background()
healthcareService, err := healthcare.NewService(ctx)
if err != nil {
return fmt.Errorf("healthcare.NewService: %v", err)
}
datasetsService := healthcareService.Projects.Locations.Datasets
parent := fmt.Sprintf("projects/%s/locations/%s", projectID, location)
req := &healthcare.DeidentifyDatasetRequest{
DestinationDataset: fmt.Sprintf("%s/datasets/%s", parent, destinationDatasetID),
Config: &healthcare.DeidentifyConfig{
Dicom: &healthcare.DicomConfig{
KeepList: &healthcare.TagFilterList{
Tags: []string{
"PatientID",
},
},
},
},
}
sourceName := fmt.Sprintf("%s/datasets/%s", parent, sourceDatasetID)
resp, err := datasetsService.Deidentify(sourceName, req).Do()
if err != nil {
return fmt.Errorf("Deidentify: %v", err)
}
// Wait for the deidentification operation to finish.
operationService := healthcareService.Projects.Locations.Datasets.Operations
for {
op, err := operationService.Get(resp.Name).Do()
if err != nil {
return fmt.Errorf("operationService.Get: %v", err)
}
if !op.Done {
time.Sleep(1 * time.Second)
continue
}
if op.Error != nil {
return fmt.Errorf("deidentify operation error: %v", *op.Error)
}
fmt.Fprintf(w, "Created de-identified dataset %s from %s\n", resp.Name, sourceName)
return nil
}
}
Java
import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.healthcare.v1.CloudHealthcare;
import com.google.api.services.healthcare.v1.CloudHealthcare.Projects.Locations.Datasets;
import com.google.api.services.healthcare.v1.CloudHealthcareScopes;
import com.google.api.services.healthcare.v1.model.DeidentifyConfig;
import com.google.api.services.healthcare.v1.model.DeidentifyDatasetRequest;
import com.google.api.services.healthcare.v1.model.DicomConfig;
import com.google.api.services.healthcare.v1.model.Operation;
import com.google.api.services.healthcare.v1.model.TagFilterList;
import com.google.auth.http.HttpCredentialsAdapter;
import com.google.auth.oauth2.GoogleCredentials;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
public class DatasetDeIdentify {
private static final String DATASET_NAME = "projects/%s/locations/%s/datasets/%s";
private static final JsonFactory JSON_FACTORY = new JacksonFactory();
private static final NetHttpTransport HTTP_TRANSPORT = new NetHttpTransport();
public static void datasetDeIdentify(String srcDatasetName, String destDatasetName)
throws IOException {
// String srcDatasetName =
// String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-src-dataset-id");
// String destDatasetName =
// String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-dest-dataset-id");
// Initialize the client, which will be used to interact with the service.
CloudHealthcare client = createClient();
// Configure what information needs to be De-Identified.
// For more information on de-identifying using tags, please see the following:
// https://cloud.google.com/healthcare/docs/how-tos/dicom-deidentify#de-identification_using_tags
TagFilterList tags = new TagFilterList().setTags(Arrays.asList("PatientID"));
DicomConfig dicomConfig = new DicomConfig().setKeepList(tags);
DeidentifyConfig config = new DeidentifyConfig().setDicom(dicomConfig);
// Create the de-identify request and configure any parameters.
DeidentifyDatasetRequest deidentifyRequest =
new DeidentifyDatasetRequest().setDestinationDataset(destDatasetName).setConfig(config);
Datasets.Deidentify request =
client.projects().locations().datasets().deidentify(srcDatasetName, deidentifyRequest);
// Execute the request, wait for the operation to complete, and process the results.
try {
Operation operation = request.execute();
while (operation.getDone() == null || !operation.getDone()) {
// Update the status of the operation with another request.
Thread.sleep(500); // Pause for 500ms between requests.
operation =
client
.projects()
.locations()
.datasets()
.operations()
.get(operation.getName())
.execute();
}
System.out.println(
"De-identified Dataset created. Response content: " + operation.getResponse());
} catch (Exception ex) {
System.out.printf("Error during request execution: %s", ex.toString());
ex.printStackTrace(System.out);
}
}
private static CloudHealthcare createClient() throws IOException {
// Use Application Default Credentials (ADC) to authenticate the requests
// For more information see https://cloud.google.com/docs/authentication/production
GoogleCredentials credential =
GoogleCredentials.getApplicationDefault()
.createScoped(Collections.singleton(CloudHealthcareScopes.CLOUD_PLATFORM));
// Create a HttpRequestInitializer, which will provide a baseline configuration to all requests.
HttpRequestInitializer requestInitializer =
request -> {
new HttpCredentialsAdapter(credential).initialize(request);
request.setConnectTimeout(60000); // 1 minute connect timeout
request.setReadTimeout(60000); // 1 minute read timeout
};
// Build the client for interacting with the service.
return new CloudHealthcare.Builder(HTTP_TRANSPORT, JSON_FACTORY, requestInitializer)
.setApplicationName("your-application-name")
.build();
}
}
Node.js
const google = require('@googleapis/healthcare');
const healthcare = google.healthcare({
version: 'v1',
auth: new google.auth.GoogleAuth({
scopes: ['https://www.googleapis.com/auth/cloud-platform'],
}),
});
const deidentifyDataset = async () => {
// TODO(developer): uncomment these lines before running the sample
// const cloudRegion = 'us-central1';
// const projectId = 'adjective-noun-123';
// const sourceDatasetId = 'my-source-dataset';
// const destinationDatasetId = 'my-destination-dataset';
// const keeplistTags = 'PatientID'
const sourceDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${sourceDatasetId}`;
const destinationDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${destinationDatasetId}`;
const request = {
sourceDataset: sourceDataset,
destinationDataset: destinationDataset,
resource: {
config: {
dicom: {
keepList: {
tags: [keeplistTags],
},
},
},
},
};
await healthcare.projects.locations.datasets.deidentify(request);
console.log(
`De-identified data written from dataset ${sourceDatasetId} to dataset ${destinationDatasetId}`
);
};
deidentifyDataset();
Python
def deidentify_dataset(project_id, location, dataset_id, destination_dataset_id):
"""Uses a DICOM tag keeplist to create a new dataset containing
de-identified DICOM data from the source dataset.
See https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/healthcare/api-client/v1/datasets
before running the sample."""
# Imports the Google API Discovery Service.
from googleapiclient import discovery
api_version = "v1"
service_name = "healthcare"
# Returns an authorized API client by discovering the Healthcare API
# and using GOOGLE_APPLICATION_CREDENTIALS environment variable.
client = discovery.build(service_name, api_version)
# TODO(developer): Uncomment these lines and replace with your values.
# project_id = 'my-project' # replace with your GCP project ID
# location = 'us-central1' # replace with the dataset's location
# dataset_id = 'my-source-dataset' # replace with the source dataset's ID
# destination_dataset_id = 'my-destination-dataset' # replace with the destination dataset's ID
source_dataset = "projects/{}/locations/{}/datasets/{}".format(
project_id, location, dataset_id
)
destination_dataset = "projects/{}/locations/{}/datasets/{}".format(
project_id, location, destination_dataset_id
)
body = {
"destinationDataset": destination_dataset,
"config": {
"dicom": {
"keepList": {
"tags": [
"Columns",
"NumberOfFrames",
"PixelRepresentation",
"MediaStorageSOPClassUID",
"MediaStorageSOPInstanceUID",
"Rows",
"SamplesPerPixel",
"BitsAllocated",
"HighBit",
"PhotometricInterpretation",
"BitsStored",
"PatientID",
"TransferSyntaxUID",
"SOPInstanceUID",
"StudyInstanceUID",
"SeriesInstanceUID",
"PixelData",
]
}
}
},
}
request = (
client.projects()
.locations()
.datasets()
.deidentify(sourceDataset=source_dataset, body=body)
)
response = request.execute()
print(
"Data in dataset {} de-identified."
"De-identified data written to {}".format(dataset_id, destination_dataset_id)
)
return response
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.