keeplist를 사용하여 DICOM 데이터 익명화

keeplist를 사용하여 DICOM 데이터에서 PII를 삭제합니다.

더 살펴보기

이 코드 샘플이 포함된 자세한 문서는 다음을 참조하세요.

코드 샘플

Go

이 샘플을 사용해 보기 전에 Cloud Healthcare API 빠른 시작: 클라이언트 라이브러리 사용Go 설정 안내를 따르세요. 자세한 내용은 Cloud Healthcare API Go API 참고 문서를 확인하세요.

Cloud Healthcare API에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다. 자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.

import (
	"context"
	"fmt"
	"io"
	"time"

	healthcare "google.golang.org/api/healthcare/v1"
)

// deidentifyDataset creates a new dataset containing de-identified data from the source dataset.
func deidentifyDataset(w io.Writer, projectID, location, sourceDatasetID, destinationDatasetID string) error {
	ctx := context.Background()

	healthcareService, err := healthcare.NewService(ctx)
	if err != nil {
		return fmt.Errorf("healthcare.NewService: %w", err)
	}

	datasetsService := healthcareService.Projects.Locations.Datasets

	parent := fmt.Sprintf("projects/%s/locations/%s", projectID, location)

	req := &healthcare.DeidentifyDatasetRequest{
		DestinationDataset: fmt.Sprintf("%s/datasets/%s", parent, destinationDatasetID),
		Config: &healthcare.DeidentifyConfig{
			Dicom: &healthcare.DicomConfig{
				KeepList: &healthcare.TagFilterList{
					Tags: []string{
						"PatientID",
					},
				},
			},
		},
	}

	sourceName := fmt.Sprintf("%s/datasets/%s", parent, sourceDatasetID)
	resp, err := datasetsService.Deidentify(sourceName, req).Do()
	if err != nil {
		return fmt.Errorf("Deidentify: %w", err)
	}

	// Wait for the deidentification operation to finish.
	operationService := healthcareService.Projects.Locations.Datasets.Operations
	for {
		op, err := operationService.Get(resp.Name).Do()
		if err != nil {
			return fmt.Errorf("operationService.Get: %w", err)
		}
		if !op.Done {
			time.Sleep(1 * time.Second)
			continue
		}
		if op.Error != nil {
			return fmt.Errorf("deidentify operation error: %v", *op.Error)
		}
		fmt.Fprintf(w, "Created de-identified dataset %s from %s\n", resp.Name, sourceName)
		return nil
	}
}

Java

이 샘플을 사용해 보기 전에 Cloud Healthcare API 빠른 시작: 클라이언트 라이브러리 사용Java 설정 안내를 따르세요. 자세한 내용은 Cloud Healthcare API Java API 참고 문서를 확인하세요.

Cloud Healthcare API에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다. 자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.

import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.gson.GsonFactory;
import com.google.api.services.healthcare.v1.CloudHealthcare;
import com.google.api.services.healthcare.v1.CloudHealthcare.Projects.Locations.Datasets;
import com.google.api.services.healthcare.v1.CloudHealthcareScopes;
import com.google.api.services.healthcare.v1.model.DeidentifyConfig;
import com.google.api.services.healthcare.v1.model.DeidentifyDatasetRequest;
import com.google.api.services.healthcare.v1.model.DicomConfig;
import com.google.api.services.healthcare.v1.model.Operation;
import com.google.api.services.healthcare.v1.model.TagFilterList;
import com.google.auth.http.HttpCredentialsAdapter;
import com.google.auth.oauth2.GoogleCredentials;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;

public class DatasetDeIdentify {
  private static final String DATASET_NAME = "projects/%s/locations/%s/datasets/%s";
  private static final JsonFactory JSON_FACTORY = new GsonFactory();
  private static final NetHttpTransport HTTP_TRANSPORT = new NetHttpTransport();

  public static void datasetDeIdentify(String srcDatasetName, String destDatasetName)
      throws IOException {
    // String srcDatasetName =
    //     String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-src-dataset-id");
    // String destDatasetName =
    //    String.format(DATASET_NAME, "your-project-id", "your-region-id", "your-dest-dataset-id");

    // Initialize the client, which will be used to interact with the service.
    CloudHealthcare client = createClient();

    // Configure what information needs to be De-Identified.
    // For more information on de-identifying using tags, please see the following:
    // https://cloud.google.com/healthcare/docs/how-tos/dicom-deidentify#de-identification_using_tags
    TagFilterList tags = new TagFilterList().setTags(Arrays.asList("PatientID"));
    DicomConfig dicomConfig = new DicomConfig().setKeepList(tags);
    DeidentifyConfig config = new DeidentifyConfig().setDicom(dicomConfig);

    // Create the de-identify request and configure any parameters.
    DeidentifyDatasetRequest deidentifyRequest =
        new DeidentifyDatasetRequest().setDestinationDataset(destDatasetName).setConfig(config);
    Datasets.Deidentify request =
        client.projects().locations().datasets().deidentify(srcDatasetName, deidentifyRequest);

    // Execute the request, wait for the operation to complete, and process the results.
    try {
      Operation operation = request.execute();
      while (operation.getDone() == null || !operation.getDone()) {
        // Update the status of the operation with another request.
        Thread.sleep(500); // Pause for 500ms between requests.
        operation =
            client
                .projects()
                .locations()
                .datasets()
                .operations()
                .get(operation.getName())
                .execute();
      }
      System.out.println(
          "De-identified Dataset created. Response content: " + operation.getResponse());
    } catch (Exception ex) {
      System.out.printf("Error during request execution: %s", ex.toString());
      ex.printStackTrace(System.out);
    }
  }

  private static CloudHealthcare createClient() throws IOException {
    // Use Application Default Credentials (ADC) to authenticate the requests
    // For more information see https://cloud.google.com/docs/authentication/production
    GoogleCredentials credential =
        GoogleCredentials.getApplicationDefault()
            .createScoped(Collections.singleton(CloudHealthcareScopes.CLOUD_PLATFORM));

    // Create a HttpRequestInitializer, which will provide a baseline configuration to all requests.
    HttpRequestInitializer requestInitializer =
        request -> {
          new HttpCredentialsAdapter(credential).initialize(request);
          request.setConnectTimeout(60000); // 1 minute connect timeout
          request.setReadTimeout(60000); // 1 minute read timeout
        };

    // Build the client for interacting with the service.
    return new CloudHealthcare.Builder(HTTP_TRANSPORT, JSON_FACTORY, requestInitializer)
        .setApplicationName("your-application-name")
        .build();
  }
}

Node.js

이 샘플을 사용해 보기 전에 Cloud Healthcare API 빠른 시작: 클라이언트 라이브러리 사용Node.js 설정 안내를 따르세요. 자세한 내용은 Cloud Healthcare API Node.js API 참고 문서를 확인하세요.

Cloud Healthcare API에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다. 자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.

const google = require('@googleapis/healthcare');
const healthcare = google.healthcare({
  version: 'v1',
  auth: new google.auth.GoogleAuth({
    scopes: ['https://www.googleapis.com/auth/cloud-platform'],
  }),
});

const deidentifyDataset = async () => {
  // TODO(developer): uncomment these lines before running the sample
  // const cloudRegion = 'us-central1';
  // const projectId = 'adjective-noun-123';
  // const sourceDatasetId = 'my-source-dataset';
  // const destinationDatasetId = 'my-destination-dataset';
  // const keeplistTags = 'PatientID'
  const sourceDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${sourceDatasetId}`;
  const destinationDataset = `projects/${projectId}/locations/${cloudRegion}/datasets/${destinationDatasetId}`;
  const request = {
    sourceDataset: sourceDataset,
    destinationDataset: destinationDataset,
    resource: {
      config: {
        dicom: {
          keepList: {
            tags: [keeplistTags],
          },
        },
      },
    },
  };

  await healthcare.projects.locations.datasets.deidentify(request);
  console.log(
    `De-identified data written from dataset ${sourceDatasetId} to dataset ${destinationDatasetId}`
  );
};

deidentifyDataset();

Python

이 샘플을 사용해 보기 전에 Cloud Healthcare API 빠른 시작: 클라이언트 라이브러리 사용Python 설정 안내를 따르세요. 자세한 내용은 Cloud Healthcare API Python API 참고 문서를 확인하세요.

Cloud Healthcare API에 인증하려면 애플리케이션 기본 사용자 인증 정보를 설정합니다. 자세한 내용은 로컬 개발 환경의 인증 설정을 참조하세요.

# Imports the Dict type for runtime type hints.
from typing import Dict


def deidentify_dataset(
    project_id: str,
    location: str,
    dataset_id: str,
    destination_dataset_id: str,
) -> Dict[str, str]:
    """Uses a DICOM tag keeplist to create a new dataset containing de-identified DICOM data from the source dataset.

    See
    https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/healthcare/api-client/v1/datasets
    before running the sample.
    See https://googleapis.github.io/google-api-python-client/docs/dyn/healthcare_v1.projects.locations.datasets.html#deidentify
    for the Python API reference.

    Args:
      project_id: The project ID or project number of the Google Cloud project you want
          to use.
      location: The name of the dataset's location.
      dataset_id: The ID of the source dataset containing the DICOM store to de-identify.
      destination_dataset_id: The ID of the dataset where de-identified DICOM data
        is written.

    Returns:
      A dictionary representing a long-running operation that results from
      calling the 'DeidentifyDataset' method. Use the
      'google.longrunning.Operation'
      API to poll the operation status.
    """
    # Imports the Python built-in time module.
    import time

    # Imports the Google API Discovery Service.
    from googleapiclient import discovery

    # Imports HttpError from the Google Python API client errors module.
    from googleapiclient.errors import HttpError

    api_version = "v1"
    service_name = "healthcare"
    # Returns an authorized API client by discovering the Healthcare API
    # and using GOOGLE_APPLICATION_CREDENTIALS environment variable.
    client = discovery.build(service_name, api_version)

    # TODO(developer): Uncomment these lines and replace with your values.
    # project_id = 'my-project'
    # location = 'us-central1'
    # dataset_id = 'my-source-dataset'
    # destination_dataset_id = 'my-destination-dataset'
    source_dataset = "projects/{}/locations/{}/datasets/{}".format(
        project_id, location, dataset_id
    )
    destination_dataset = "projects/{}/locations/{}/datasets/{}".format(
        project_id, location, destination_dataset_id
    )

    body = {
        "destinationDataset": destination_dataset,
        "config": {
            "dicom": {
                "keepList": {
                    "tags": [
                        "Columns",
                        "NumberOfFrames",
                        "PixelRepresentation",
                        "MediaStorageSOPClassUID",
                        "MediaStorageSOPInstanceUID",
                        "Rows",
                        "SamplesPerPixel",
                        "BitsAllocated",
                        "HighBit",
                        "PhotometricInterpretation",
                        "BitsStored",
                        "PatientID",
                        "TransferSyntaxUID",
                        "SOPInstanceUID",
                        "StudyInstanceUID",
                        "SeriesInstanceUID",
                        "PixelData",
                    ]
                }
            }
        },
    }

    request = (
        client.projects()
        .locations()
        .datasets()
        .deidentify(sourceDataset=source_dataset, body=body)
    )

    # Set a start time for operation completion.
    start_time = time.time()
    # TODO(developer): Increase the max_time if de-identifying many resources.
    max_time = 600

    try:
        operation = request.execute()
        while not operation.get("done", False):
            # Poll until the operation finishes.
            print("Waiting for operation to finish...")
            if time.time() - start_time > max_time:
                raise RuntimeError("Timed out waiting for operation to finish.")
            operation = (
                client.projects()
                .locations()
                .datasets()
                .operations()
                .get(name=operation["name"])
                .execute()
            )
            # Wait 5 seconds between each poll to the operation.
            time.sleep(5)

        if operation.get("error"):
            raise TimeoutError(f"De-identify operation failed: {operation['error']}")
        else:
            print(f"De-identified data to dataset: {destination_dataset_id}")
            print(
                f"Resources succeeded: {operation.get('metadata').get('counter').get('success')}"
            )
            print(
                f"Resources failed: {operation.get('metadata').get('counter').get('failure')}"
            )
            return operation

    except HttpError as err:
        # A common error is when the destination dataset already exists.
        if err.resp.status == 409:
            raise RuntimeError(
                f"Destination dataset with ID {destination_dataset_id} already exists."
            )
        else:
            raise err

다음 단계

다른 Google Cloud 제품의 코드 샘플을 검색하고 필터링하려면 Google Cloud 샘플 브라우저를 참고하세요.