使用 keeplist 从 DICOM 数据中移除个人身份信息。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
Go
在试用此示例之前,请按照使用客户端库的 Cloud Healthcare API 快速入门中的 Go 设置说明进行操作。如需了解详情,请参阅 Cloud Healthcare API Go API 参考文档。
如需向 Cloud Healthcare API 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
import (
"context"
"fmt"
"io"
"time"
healthcare "google.golang.org/api/healthcare/v1"
)
// deidentifyDataset creates a new dataset containing de-identified data from the source dataset.
func deidentifyDataset(w io.Writer, projectID, location, sourceDatasetID, destinationDatasetID string) error {
ctx := context.Background()
healthcareService, err := healthcare.NewService(ctx)
if err != nil {
return fmt.Errorf("healthcare.NewService: %w", err)
}
datasetsService := healthcareService.Projects.Locations.Datasets
parent := fmt.Sprintf("projects/%s/locations/%s", projectID, location)
req := &healthcare.DeidentifyDatasetRequest{
DestinationDataset: fmt.Sprintf("%s/datasets/%s", parent, destinationDatasetID),
Config: &healthcare.DeidentifyConfig{
Dicom: &healthcare.DicomConfig{
KeepList: &healthcare.TagFilterList{
Tags: []string{
"PatientID",
},
},
},
},
}
sourceName := fmt.Sprintf("%s/datasets/%s", parent, sourceDatasetID)
resp, err := datasetsService.Deidentify(sourceName, req).Do()
if err != nil {
return fmt.Errorf("Deidentify: %w", err)
}
// Wait for the deidentification operation to finish.
operationService := healthcareService.Projects.Locations.Datasets.Operations
for {
op, err := operationService.Get(resp.Name).Do()
if err != nil {
return fmt.Errorf("operationService.Get: %w", err)
}
if !op.Done {
time.Sleep(1 * time.Second)
continue
}
if op.Error != nil {
return fmt.Errorf("deidentify operation error: %v", *op.Error)
}
fmt.Fprintf(w, "Created de-identified dataset %s from %s\n", resp.Name, sourceName)
return nil
}
}
Java
在试用此示例之前,请按照使用客户端库的 Cloud Healthcare API 快速入门中的 Java 设置说明进行操作。如需了解详情,请参阅 Cloud Healthcare API Java API 参考文档。
如需向 Cloud Healthcare API 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory;
import com.google.api.services.healthcare.v1.CloudHealthcare;
import com.google.api.services.healthcare.v1.CloudHealthcare.Projects.Locations.Datasets;
import com.google.api.services.healthcare.v1.CloudHealthcareScopes;
import com.google.api.services.healthcare.v1.model.DeidentifyConfig;
import com.google.api.services.healthcare.v1.model.DeidentifyDatasetRequest;
import com.google.api.services.healthcare.v1.model.DicomConfig;
import com.google.api.services.healthcare.v1.model.Operation;
import com.google.api.services.healthcare.v1.model.TagFilterList;
import com.google.auth.http.HttpCredentialsAdapter;
import com.google.auth.oauth2.GoogleCredentials;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
public class DatasetDeIdentify {
private static final String DATASET_NAME = "projects/%s/locations/%s/datasets/%s";
private static final JsonFactory JSON_FACTORY = new GsonFactory();
private static final NetHttpTransport HTTP_TRANSPORT = new NetHttpTransport();
public static void datasetDeIdentify(String srcDatasetName, String destDatasetName)
throws IOException {
// String srcDatasetName =
// String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-src-dataset-id");
// String destDatasetName =
// String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-dest-dataset-id");
// Initialize the client, which will be used to interact with the service.
CloudHealthcare client = createClient();
// Configure what information needs to be De-Identified.
// For more information on de-identifying using tags, please see the following:
// https://cloud.google.com/healthcare/docs/how-tos/dicom-deidentify#de-identification_using_tags
TagFilterList tags = new TagFilterList().setTags(Arrays.asList("PatientID"));
DicomConfig dicomConfig = new DicomConfig().setKeepList(tags);
DeidentifyConfig config = new DeidentifyConfig().setDicom(dicomConfig);
// Create the de-identify request and configure any parameters.
DeidentifyDatasetRequest deidentifyRequest =
new DeidentifyDatasetRequest().setDestinationDataset(destDatasetName).setConfig(config);
Datasets.Deidentify request =
client.projects().locations().datasets().deidentify(srcDatasetName, deidentifyRequest);
// Execute the request, wait for the operation to complete, and process the results.
try {
Operation operation = request.execute();
while (operation.getDone() == null || !operation.getDone()) {
// Update the status of the operation with another request.
Thread.sleep(500); // Pause for 500ms between requests.
operation =
client
.projects()
.locations()
.datasets()
.operations()
.get(operation.getName())
.execute();
}
System.out.println(
"De-identified Dataset created. Response content: " + operation.getResponse());
} catch (Exception ex) {
System.out.printf("Error during request execution: %s", ex.toString());
ex.printStackTrace(System.out);
}
}
private static CloudHealthcare createClient() throws IOException {
// Use Application Default Credentials (ADC) to authenticate the requests
// For more information see https://cloud.google.com/docs/authentication/production
GoogleCredentials credential =
GoogleCredentials.getApplicationDefault()
.createScoped(Collections.singleton(CloudHealthcareScopes.CLOUD_PLATFORM));
// Create a HttpRequestInitializer, which will provide a baseline configuration to all requests.
HttpRequestInitializer requestInitializer =
request -> {
new HttpCredentialsAdapter(credential).initialize(request);
request.setConnectTimeout(60000); // 1 minute connect timeout
request.setReadTimeout(60000); // 1 minute read timeout
};
// Build the client for interacting with the service.
return new CloudHealthcare.Builder(HTTP_TRANSPORT, JSON_FACTORY, requestInitializer)
.setApplicationName("your-application-name")
.build();
}
}
Node.js
在试用此示例之前,请按照使用客户端库的 Cloud Healthcare API 快速入门中的 Node.js 设置说明进行操作。如需了解详情,请参阅 Cloud Healthcare API Node.js API 参考文档。
如需向 Cloud Healthcare API 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
const google = require('@googleapis/healthcare');
const healthcare = google.healthcare({
version: 'v1',
auth: new google.auth.GoogleAuth({
scopes: ['https://www.googleapis.com/auth/cloud-platform'],
}),
});
const deidentifyDataset = async () => {
// TODO(developer): uncomment these lines before running the sample
// const cloudRegion = 'us-central1';
// const projectId = 'adjective-noun-123';
// const sourceDatasetId = 'my-source-dataset';
// const destinationDatasetId = 'my-destination-dataset';
// const keeplistTags = 'PatientID'
const sourceDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${sourceDatasetId}`;
const destinationDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${destinationDatasetId}`;
const request = {
sourceDataset: sourceDataset,
destinationDataset: destinationDataset,
resource: {
config: {
dicom: {
keepList: {
tags: [keeplistTags],
},
},
},
},
};
await healthcare.projects.locations.datasets.deidentify(request);
console.log(
`De-identified data written from dataset ${sourceDatasetId} to dataset ${destinationDatasetId}`
);
};
deidentifyDataset();
Python
在试用此示例之前,请按照使用客户端库的 Cloud Healthcare API 快速入门中的 Python 设置说明进行操作。如需了解详情,请参阅 Cloud Healthcare API Python API 参考文档。
如需向 Cloud Healthcare API 进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
# Imports the Dict type for runtime type hints.
from typing import Dict
def deidentify_dataset(
project_id: str,
location: str,
dataset_id: str,
destination_dataset_id: str,
) -> Dict[str, str]:
"""Uses a DICOM tag keeplist to create a new dataset containing de-identified DICOM data from the source dataset.
See
https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/healthcare/api-client/v1/datasets
before running the sample.
See https://googleapis.github.io/google-api-python-client/docs/dyn/healthcare_v1.projects.locations.datasets.html#deidentify
for the Python API reference.
Args:
project_id: The project ID or project number of the Google Cloud project you want
to use.
location: The name of the dataset's location.
dataset_id: The ID of the source dataset containing the DICOM store to de-identify.
destination_dataset_id: The ID of the dataset where de-identified DICOM data
is written.
Returns:
A dictionary representing a long-running operation that results from
calling the 'DeidentifyDataset' method. Use the
'google.longrunning.Operation'
API to poll the operation status.
"""
# Imports the Python built-in time module.
import time
# Imports the Google API Discovery Service.
from googleapiclient import discovery
# Imports HttpError from the Google Python API client errors module.
from googleapiclient.errors import HttpError
api_version = "v1"
service_name = "healthcare"
# Returns an authorized API client by discovering the Healthcare API
# and using GOOGLE_APPLICATION_CREDENTIALS environment variable.
client = discovery.build(service_name, api_version)
# TODO(developer): Uncomment these lines and replace with your values.
# project_id = 'my-project'
# location = 'us-central1'
# dataset_id = 'my-source-dataset'
# destination_dataset_id = 'my-destination-dataset'
source_dataset = "projects/{}/locations/{}/datasets/{}".format(
project_id, location, dataset_id
)
destination_dataset = "projects/{}/locations/{}/datasets/{}".format(
project_id, location, destination_dataset_id
)
body = {
"destinationDataset": destination_dataset,
"config": {
"dicom": {
"keepList": {
"tags": [
"Columns",
"NumberOfFrames",
"PixelRepresentation",
"MediaStorageSOPClassUID",
"MediaStorageSOPInstanceUID",
"Rows",
"SamplesPerPixel",
"BitsAllocated",
"HighBit",
"PhotometricInterpretation",
"BitsStored",
"PatientID",
"TransferSyntaxUID",
"SOPInstanceUID",
"StudyInstanceUID",
"SeriesInstanceUID",
"PixelData",
]
}
}
},
}
request = (
client.projects()
.locations()
.datasets()
.deidentify(sourceDataset=source_dataset, body=body)
)
# Set a start time for operation completion.
start_time = time.time()
# TODO(developer): Increase the max_time if de-identifying many resources.
max_time = 600
try:
operation = request.execute()
while not operation.get("done", False):
# Poll until the operation finishes.
print("Waiting for operation to finish...")
if time.time() - start_time > max_time:
raise RuntimeError("Timed out waiting for operation to finish.")
operation = (
client.projects()
.locations()
.datasets()
.operations()
.get(name=operation["name"])
.execute()
)
# Wait 5 seconds between each poll to the operation.
time.sleep(5)
if operation.get("error"):
raise TimeoutError(f"De-identify operation failed: {operation['error']}")
else:
print(f"De-identified data to dataset: {destination_dataset_id}")
print(
f"Resources succeeded: {operation.get('metadata').get('counter').get('success')}"
)
print(
f"Resources failed: {operation.get('metadata').get('counter').get('failure')}"
)
return operation
except HttpError as err:
# A common error is when the destination dataset already exists.
if err.resp.status == 409:
raise RuntimeError(
f"Destination dataset with ID {destination_dataset_id} already exists."
)
else:
raise err
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。