De-identify sensitive data with a simple word list

Matches against a custom simple word list to de-identify sensitive data.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


using System;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;

public class DeidentifyWithSimpleWordList
{
    public static DeidentifyContentResponse Deidentify(string projectId, string text)
    {
        // Instantiate a client.
        var dlp = DlpServiceClient.Create();

        var contentItem = new ContentItem { Value = text };

        var wordList = new CustomInfoType.Types.Dictionary.Types.WordList
        {
            Words = { new string[] { "RM-GREEN", "RM-YELLOW", "RM-ORANGE" } }
        };

        var infoType = new InfoType
        {
            Name = "CUSTOM_ROOM_ID"
        };

        var customInfoType = new CustomInfoType
        {
            InfoType = infoType,
            Dictionary = new CustomInfoType.Types.Dictionary
            {
                WordList = wordList
            }
        };

        var inspectConfig = new InspectConfig
        {
            CustomInfoTypes =
            {
                customInfoType,
            }
        };
        var primitiveTransformation = new PrimitiveTransformation
        {
            ReplaceWithInfoTypeConfig = new ReplaceWithInfoTypeConfig { }
        };

        var transformation = new InfoTypeTransformations.Types.InfoTypeTransformation
        {
            InfoTypes = { infoType },
            PrimitiveTransformation = primitiveTransformation
        };

        var deidentifyConfig = new DeidentifyConfig
        {
            InfoTypeTransformations = new InfoTypeTransformations
            {
                Transformations = { transformation }
            }
        };

        var request = new DeidentifyContentRequest
        {
            Parent = new LocationName(projectId, "global").ToString(),
            InspectConfig = inspectConfig,
            DeidentifyConfig = deidentifyConfig,
            Item = contentItem
        };

        // Call the API.
        var response = dlp.DeidentifyContent(request);

        // Inspect the results.
        Console.WriteLine($"Deidentified content: {response.Item.Value}");
        return response;
    }
}

Go

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"

	dlp "cloud.google.com/go/dlp/apiv2"
	"cloud.google.com/go/dlp/apiv2/dlppb"
)

// deidentifyWithWordList matches against a custom simple word list to de-identify sensitive
// data based on the input
func deidentifyWithWordList(w io.Writer, projectID, input string, infoTypeName string, wordList []string) error {
	// projectID := "my-project-id"
	// input := "Patient was seen in RM-YELLOW then transferred to rm green."
	// wordList := []string{"RM-GREEN", "RM-YELLOW", "RM-ORANGE"}

	ctx := context.Background()

	// Initialize a client once and reuse it to send multiple requests. Clients
	// are safe to use across goroutines. When the client is no longer needed,
	// call the Close method to cleanup its resources.
	client, err := dlp.NewClient(ctx)
	if err != nil {
		return err
	}
	// Closing the client safely cleans up background resources.
	defer client.Close()

	// Specify what content you want the service to DeIdentify.
	item := &dlppb.ContentItem{
		DataItem: &dlppb.ContentItem_Value{
			Value: input,
		},
	}

	// Specify the word list custom info type the inspection will look for.
	infoType := &dlppb.InfoType{
		Name: infoTypeName,
	}

	var customInfoType = &dlppb.CustomInfoType{
		InfoType: infoType,
		Type: &dlppb.CustomInfoType_Dictionary_{
			Dictionary: &dlppb.CustomInfoType_Dictionary{
				Source: &dlppb.CustomInfoType_Dictionary_WordList_{
					// Construct the word list to be detected
					WordList: &dlppb.CustomInfoType_Dictionary_WordList{
						Words: wordList,
					},
				},
			},
		},
	}

	// Define type of de-identification as replacement.
	primitiveTransformation := &dlppb.PrimitiveTransformation{
		Transformation: &dlppb.PrimitiveTransformation_ReplaceWithInfoTypeConfig{
			ReplaceWithInfoTypeConfig: &dlppb.ReplaceWithInfoTypeConfig{},
		},
	}

	infoTypeTransformation := &dlppb.InfoTypeTransformations_InfoTypeTransformation{
		InfoTypes:               []*dlppb.InfoType{infoType},
		PrimitiveTransformation: primitiveTransformation,
	}

	infoTypeTransformations := &dlppb.InfoTypeTransformations{
		// Associate de-identification type with info type.
		Transformations: []*dlppb.InfoTypeTransformations_InfoTypeTransformation{
			infoTypeTransformation,
		},
	}

	// Create a configured request.
	req := &dlppb.DeidentifyContentRequest{
		Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
		InspectConfig: &dlppb.InspectConfig{
			CustomInfoTypes: []*dlppb.CustomInfoType{
				customInfoType,
			},
		},
		// Construct the configuration for the de-identify request and list all desired transformations.
		DeidentifyConfig: &dlppb.DeidentifyConfig{
			Transformation: &dlppb.DeidentifyConfig_InfoTypeTransformations{
				InfoTypeTransformations: infoTypeTransformations,
			},
		},
		// The item to analyze.
		Item: item,
	}

	// Send the request.
	resp, err := client.DeidentifyContent(ctx, req)
	if err != nil {
		return err
	}

	// Print the result.
	fmt.Fprintf(w, "output : %v", resp.GetItem().GetValue())
	return nil
}

Java

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CustomInfoType;
import com.google.privacy.dlp.v2.CustomInfoType.Dictionary;
import com.google.privacy.dlp.v2.CustomInfoType.Dictionary.WordList;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.ReplaceWithInfoTypeConfig;
import java.io.IOException;

public class DeIdentifyWithSimpleWordList {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    String textToDeIdentify = "Patient was seen in RM-YELLOW then transferred to rm green.";
    deidentifyWithSimpleWordList(projectId, textToDeIdentify);
  }

  public static void deidentifyWithSimpleWordList(String projectId, String textToDeIdentify)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {

      // Specify what content you want the service to DeIdentify.
      ContentItem contentItem = ContentItem.newBuilder().setValue(textToDeIdentify).build();

      // Construct the word list to be detected
      Dictionary wordList =
          Dictionary.newBuilder()
              .setWordList(
                  WordList.newBuilder()
                      .addWords("RM-GREEN")
                      .addWords("RM-YELLOW")
                      .addWords("RM-ORANGE")
                      .build())
              .build();

      // Specify the word list custom info type the inspection will look for.
      InfoType infoType = InfoType.newBuilder().setName("CUSTOM_ROOM_ID").build();
      CustomInfoType customInfoType =
          CustomInfoType.newBuilder().setInfoType(infoType).setDictionary(wordList).build();
      InspectConfig inspectConfig =
          InspectConfig.newBuilder().addCustomInfoTypes(customInfoType).build();

      // Define type of deidentification as replacement.
      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder()
              .setReplaceWithInfoTypeConfig(ReplaceWithInfoTypeConfig.getDefaultInstance())
              .build();

      // Associate deidentification type with info type.
      InfoTypeTransformation transformation =
          InfoTypeTransformation.newBuilder()
              .addInfoTypes(infoType)
              .setPrimitiveTransformation(primitiveTransformation)
              .build();

      // Construct the configuration for the Redact request and list all desired transformations.
      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder()
              .setInfoTypeTransformations(
                  InfoTypeTransformations.newBuilder().addTransformations(transformation))
              .build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setInspectConfig(inspectConfig)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results
      System.out.println(
          "Text after replace with infotype config: " + response.getItem().getValue());
    }
  }
}

Node.js

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// TODO(developer): Replace these variables before running the sample.
// const projectId = "your-project-id";

// The string to de-identify
// const textToInspect = 'Patient was seen in RM-YELLOW then transferred to rm green.';

// Words to look for during inspection
// const words = ['RM-GREEN', 'RM-YELLOW', 'RM-ORANGE'];

// Name of the custom info type
// const customInfoTypeName = 'CUSTOM_ROOM_ID';

async function deIdentifyWithSimpleWordList() {
  // Initialize client that will be used to send requests. This client only needs to be created
  // once, and can be reused for multiple requests. After completing all of your requests, call
  // the "close" method on the client to safely clean up any remaining background resources.
  const dlp = new DLP.DlpServiceClient();

  // Construct the word list to be detected
  const wordList = {
    words: words,
  };

  // Specify the word list custom info type the inspection will look for.
  const infoType = {
    name: customInfoTypeName,
  };
  const customInfoType = {
    infoType,
    dictionary: {
      wordList,
    },
  };

  // Construct de-identify configuration
  const deidentifyConfig = {
    infoTypeTransformations: {
      transformations: [
        {
          primitiveTransformation: {
            replaceWithInfoTypeConfig: {},
          },
        },
      ],
    },
  };

  // Construct inspect configuration
  const inspectConfig = {
    customInfoTypes: [customInfoType],
  };

  // Construct Item
  const item = {
    value: textToInspect,
  };
  // Combine configurations into a request for the service.
  const request = {
    parent: `projects/${projectId}/locations/global`,
    item,
    deidentifyConfig,
    inspectConfig,
  };

  // Send the request and receive response from the service
  const [response] = await dlp.deidentifyContent(request);
  // Print the results
  console.log(
    `Text after replace with infotype config: ${response.item.value}`
  );
}

deIdentifyWithSimpleWordList();

PHP

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\CustomInfoType;
use Google\Cloud\Dlp\V2\CustomInfoType\Dictionary;
use Google\Cloud\Dlp\V2\CustomInfoType\Dictionary\WordList;
use Google\Cloud\Dlp\V2\DeidentifyConfig;
use Google\Cloud\Dlp\V2\DeidentifyContentRequest;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InfoTypeTransformations;
use Google\Cloud\Dlp\V2\InfoTypeTransformations\InfoTypeTransformation;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
use Google\Cloud\Dlp\V2\ReplaceWithInfoTypeConfig;

/**
 * De-identify sensitive data with a simple word list
 * Matches against a custom simple word list to de-identify sensitive data.
 *
 * @param string $callingProjectId  The Google Cloud project id to use as a parent resource.
 * @param string $string            The string to deidentify (will be treated as text).
 */

function deidentify_simple_word_list(
    // TODO(developer): Replace sample parameters before running the code.
    string $callingProjectId,
    string $string = 'Patient was seen in RM-YELLOW then transferred to rm green.'
): void {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    $parent = "projects/$callingProjectId/locations/global";

    $content = (new ContentItem())
        ->setValue($string);

    // Construct the word list to be detected
    $wordList = (new Dictionary())
        ->setWordList((new WordList())
            ->setWords(['RM-GREEN', 'RM-YELLOW', 'RM-ORANGE']));

    // The infoTypes of information to mask
    $custoMRoomIdinfoType = (new InfoType())
        ->setName('CUSTOM_ROOM_ID');
    $customInfoType = (new CustomInfoType())
        ->setInfoType($custoMRoomIdinfoType)
        ->setDictionary($wordList);

    // Create the configuration object
    $inspectConfig = (new InspectConfig())
        ->setCustomInfoTypes([$customInfoType]);

    // Create the information transform configuration objects
    $primitiveTransformation = (new PrimitiveTransformation())
        ->setReplaceWithInfoTypeConfig(new ReplaceWithInfoTypeConfig());

    $infoTypeTransformation = (new InfoTypeTransformation())
        ->setPrimitiveTransformation($primitiveTransformation)
        ->setInfoTypes([$custoMRoomIdinfoType]);

    $infoTypeTransformations = (new InfoTypeTransformations())
        ->setTransformations([$infoTypeTransformation]);

    // Create the deidentification configuration object
    $deidentifyConfig = (new DeidentifyConfig())
        ->setInfoTypeTransformations($infoTypeTransformations);

    // Run request
    $deidentifyContentRequest = (new DeidentifyContentRequest())
        ->setParent($parent)
        ->setDeidentifyConfig($deidentifyConfig)
        ->setItem($content)
        ->setInspectConfig($inspectConfig);
    $response = $dlp->deidentifyContent($deidentifyContentRequest);

    // Print the results
    printf('Deidentified content: %s', $response->getItem()->getValue());
}

Python

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import google.cloud.dlp


def deidentify_with_simple_word_list(
    project: str,
    input_str: str,
    custom_info_type_name: str,
    word_list: list[str],
) -> None:
    """Uses the Data Loss Prevention API to de-identify sensitive data in a
      string by matching against custom word list.

    Args:
        project: The Google Cloud project id to use as a parent resource.
        input_str: The string to deidentify (will be treated as text).
        custom_info_type_name: The name of the custom info type to use.
        word_list: The list of strings to match against.
    """

    # Instantiate a client.
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Prepare custom_info_types by parsing word lists
    word_list = {"words": word_list}
    custom_info_types = [
        {
            "info_type": {"name": custom_info_type_name},
            "dictionary": {"word_list": word_list},
        }
    ]

    # Construct the configuration dictionary
    inspect_config = {
        "custom_info_types": custom_info_types,
    }

    # Construct deidentify configuration dictionary
    deidentify_config = {
        "info_type_transformations": {
            "transformations": [
                {"primitive_transformation": {"replace_with_info_type_config": {}}}
            ]
        }
    }

    # Construct the `item`.
    item = {"value": input_str}

    # Convert the project id into a full resource id.
    parent = f"projects/{project}/locations/global"

    # Call the API
    response = dlp.deidentify_content(
        request={
            "parent": parent,
            "deidentify_config": deidentify_config,
            "inspect_config": inspect_config,
            "item": item,
        }
    )

    print(f"De-identified Content: {response.item.value}")

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.