De-identify content through deterministic encryption

Use the Data Loss Prevention API to de-identify sensitive data in a string using deterministic encryption, which is a reversible cryptographic method. The encryption is performed with a wrapped key.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


using System;
using System.Collections.Generic;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;

public class DeidentifyWithDeterministic
{
    public static DeidentifyContentResponse Deidentify(
        string projectId,
        string text,
        string keyName,
        string wrapperKey,
        InfoType surrogateType = null,
        IEnumerable<InfoType> infoTypes = null)
    {
        // Instantiate a client.
        var dlp = DlpServiceClient.Create();

        // Construct the inspect config by specifying the type of info to be inspected.
        var inspectConfig = new InspectConfig
        {
            InfoTypes =
            {
                infoTypes ?? new InfoType[] { new InfoType { Name = "PHONE_NUMBER" } }
            }
        };

        // Construct the crypto deterministic config by providing key name, wrapped key and surrogate type.
        var cryptoDeterministicConfig = new CryptoDeterministicConfig
        {
            CryptoKey = new CryptoKey
            {
                KmsWrapped = new KmsWrappedCryptoKey
                {
                    CryptoKeyName = keyName,
                    WrappedKey = Google.Protobuf.ByteString.FromBase64(wrapperKey)
                }
            },
            SurrogateInfoType = surrogateType ?? new InfoType { Name = "PHONE_TOKEN" }
        };


        // Construct the deidentify config using crypto deterministic config.
        var deidentifyConfig = new DeidentifyConfig
        {
            InfoTypeTransformations = new InfoTypeTransformations
            {
                Transformations =
                {
                    new InfoTypeTransformations.Types.InfoTypeTransformation
                    {
                        PrimitiveTransformation = new PrimitiveTransformation
                        {
                            CryptoDeterministicConfig = cryptoDeterministicConfig
                        }
                    }
                }
            }
        };

        // Construct the request.
        var request = new DeidentifyContentRequest
        {
            ParentAsLocationName = new LocationName(projectId, "global"),
            DeidentifyConfig = deidentifyConfig,
            InspectConfig = inspectConfig,
            Item = new ContentItem { Value = text }
        };

        // Call the API.
        DeidentifyContentResponse response = dlp.DeidentifyContent(request);

        // Check the de-identified content.
        Console.WriteLine($"De-identified content: {response.Item.Value}");
        return response;
    }
}

Go

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import (
	"context"
	"encoding/base64"
	"fmt"
	"io"

	dlp "cloud.google.com/go/dlp/apiv2"
	"cloud.google.com/go/dlp/apiv2/dlppb"
)

// deIdentifyDeterministicEncryption de-identifies through deterministic encryption
func deIdentifyDeterministicEncryption(w io.Writer, projectID, inputStr string, infoTypeNames []string, keyFileName, cryptoKeyName, surrogateInfoType string) error {
	// projectId := "your-project-id"
	// inputStr := "My SSN is 111111111"
	// infoTypeNames := []string{"US_SOCIAL_SECURITY_NUMBER"}
	/* keyFileName :=  "projects/YOUR_PROJECT/"
	   + "locations/YOUR_KEYRING_REGION/"
	   + "keyRings/YOUR_KEYRING_NAME/"
	   + "cryptoKeys/YOUR_KEY_NAME"
	*/
	// cryptoKeyName := "YOUR_ENCRYPTED_AES_256_KEY"
	// surrogateInfoType := "SSN_TOKEN"

	ctx := context.Background()

	// Initialize a client once and reuse it to send multiple requests. Clients
	// are safe to use across goroutines. When the client is no longer needed,
	// call the Close method to cleanup its resources.
	client, err := dlp.NewClient(ctx)
	if err != nil {
		return err
	}

	// Closing the client safely cleans up background resources.
	defer client.Close()

	// Specify an encrypted AES-256 key and the name of the Cloud KMS key that encrypted it.
	wrappedKey, err := base64.StdEncoding.DecodeString(cryptoKeyName)
	if err != nil {
		return err
	}

	// Specify the type of info the inspection will look for.
	// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
	var infoTypes []*dlppb.InfoType
	for _, it := range infoTypeNames {
		infoTypes = append(infoTypes, &dlppb.InfoType{Name: it})
	}

	// Specify the key used by the encryption function for deterministic encryption.
	cryptoReplaceDeterministicConfig := &dlppb.CryptoDeterministicConfig{
		CryptoKey: &dlppb.CryptoKey{
			Source: &dlppb.CryptoKey_KmsWrapped{
				KmsWrapped: &dlppb.KmsWrappedCryptoKey{
					WrappedKey:    wrappedKey,
					CryptoKeyName: keyFileName,
				},
			},
		},
		SurrogateInfoType: &dlppb.InfoType{
			Name: surrogateInfoType,
		},
	}

	// Specifying the info-types to look for.
	inspectConfig := &dlppb.InspectConfig{
		InfoTypes: infoTypes,
	}

	// Specify what content you want the service to de-identify.
	contentItem := &dlppb.ContentItem{
		DataItem: &dlppb.ContentItem_Value{
			Value: inputStr,
		},
	}

	// Specifying the deterministic crypto.
	primitiveTransformation := &dlppb.PrimitiveTransformation{
		Transformation: &dlppb.PrimitiveTransformation_CryptoDeterministicConfig{
			CryptoDeterministicConfig: cryptoReplaceDeterministicConfig,
		},
	}

	// Construct a de-identification config for de-identify deterministic request.
	deIdentifyConfig := &dlppb.DeidentifyConfig{
		Transformation: &dlppb.DeidentifyConfig_InfoTypeTransformations{
			InfoTypeTransformations: &dlppb.InfoTypeTransformations{
				Transformations: []*dlppb.InfoTypeTransformations_InfoTypeTransformation{
					{
						PrimitiveTransformation: primitiveTransformation,
					},
				},
			},
		},
	}

	// Construct the de-identification request to be sent by the client.
	req := &dlppb.DeidentifyContentRequest{
		Parent:           fmt.Sprintf("projects/%s/locations/global", projectID),
		DeidentifyConfig: deIdentifyConfig,
		InspectConfig:    inspectConfig,
		Item:             contentItem,
	}

	// Send the request.
	resp, err := client.DeidentifyContent(ctx, req)
	if err != nil {
		return err
	}

	// Print the results.
	fmt.Fprintf(w, "output : %v", resp.GetItem().GetValue())
	return nil
}

Java

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CryptoDeterministicConfig;
import com.google.privacy.dlp.v2.CryptoKey;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.KmsWrappedCryptoKey;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import org.apache.commons.codec.binary.Base64;

public class DeIdenitfyWithDeterministicEncryption {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.

    //The Google Cloud project id to use as a parent resource.
    String projectId = "your-project-id";
    // The string to de-identify.
    String textToDeIdentify = "My SSN is 372819127";
    // The encrypted ('wrapped') AES-256 key to use.
    // This key should be encrypted using the Cloud KMS key specified by key_name.
    String wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY";
    // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key.
    String kmsKeyName =
        "projects/YOUR_PROJECT/"
            + "locations/YOUR_KEYRING_REGION/"
            + "keyRings/YOUR_KEYRING_NAME/"
            + "cryptoKeys/YOUR_KEY_NAME";
    deIdentifyWithDeterministicEncryption(projectId, textToDeIdentify, wrappedKey, kmsKeyName);
  }

  // De-identifies sensitive data in a string using deterministic encryption. The encryption is
  // performed with a wrapped key.
  public static String deIdentifyWithDeterministicEncryption(
      String projectId, String textToDeIdentify, String wrappedKey, String key) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to de-identify.
      ContentItem contentItem = ContentItem.newBuilder()
              .setValue(textToDeIdentify)
              .build();

      // Specify the type of info the inspection will look for.
      // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
      InfoType infoType = InfoType.newBuilder()
              .setName("US_SOCIAL_SECURITY_NUMBER")
              .build();

      InspectConfig inspectConfig = InspectConfig.newBuilder()
                  .addAllInfoTypes(Collections.singletonList(infoType))
                  .build();

      // Specify an encrypted AES-256 key and the name of the Cloud KMS key that encrypted it.
      KmsWrappedCryptoKey unwrappedCryptoKey = KmsWrappedCryptoKey.newBuilder()
              .setWrappedKey(ByteString.copyFrom(
                      Base64.decodeBase64(wrappedKey.getBytes(StandardCharsets.UTF_8))))
              .setCryptoKeyName(key)
              .build();

      CryptoKey cryptoKey = CryptoKey.newBuilder()
              .setKmsWrapped(unwrappedCryptoKey)
              .build();

      // Specify how the info from the inspection should be encrypted.
      InfoType surrogateInfoType = InfoType.newBuilder()
              .setName("SSN_TOKEN")
              .build();

      CryptoDeterministicConfig cryptoDeterministicConfig = CryptoDeterministicConfig.newBuilder()
              .setSurrogateInfoType(surrogateInfoType)
              .setCryptoKey(cryptoKey)
              .build();

      PrimitiveTransformation primitiveTransformation = PrimitiveTransformation.newBuilder()
              .setCryptoDeterministicConfig(cryptoDeterministicConfig)
              .build();

      InfoTypeTransformations.InfoTypeTransformation infoTypeTransformation =
              InfoTypeTransformations.InfoTypeTransformation.newBuilder()
              .setPrimitiveTransformation(primitiveTransformation)
              .build();

      InfoTypeTransformations transformations = InfoTypeTransformations.newBuilder()
              .addTransformations(infoTypeTransformation)
              .build();

      DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder()
              .setInfoTypeTransformations(transformations)
              .build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setInspectConfig(inspectConfig)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println(
          "Text after de-identification: " + response.getItem().getValue());

      return response.getItem().getValue();

    }
  }
}

Node.js

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Instantiates a client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const projectId = 'my-project';

// The string to deidentify
// const string = 'My name is Alicia Abernathy, and my email address is aabernathy@example.com.';

// The infoTypes of information to match
// const infoTypes = [{ name: 'EMAIL_ADDRESS' }];

// The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key
// const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME';

// The encrypted ('wrapped') AES-256 key to use
// This key should be encrypted using the Cloud KMS key specified above
// const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY'

// The name of the surrogate custom info type to use
// Only necessary if you want to reverse the deidentification process
// Can be essentially any arbitrary string, as long as it doesn't appear
// in your dataset otherwise.
// const surrogateType = 'EMAIL_ADDRESS_TOKEN';

async function deidentifyDeterministic() {
  // Specify an encrypted AES-256 key and the name of the Cloud KMS key that encrypted it
  const cryptoDeterministicEncryption = {
    cryptoKey: {
      kmsWrapped: {
        wrappedKey: wrappedKey,
        cryptoKeyName: keyName,
      },
    },
    surrogateInfoType: {name: surrogateType},
  };

  // Construct inspect configuration to match information
  const inspectConfig = {
    infoTypes,
  };

  // Associate the encryption with the infotype transformation.
  const infoTypeTransformations = {
    transformations: [
      {
        infoTypes,
        primitiveTransformation: {
          cryptoDeterministicConfig: cryptoDeterministicEncryption,
        },
      },
    ],
  };

  // Construct item to inspect
  const item = {value: string};

  // Combine configurations into a request for the service.
  const request = {
    parent: `projects/${projectId}/locations/global`,
    deidentifyConfig: {
      infoTypeTransformations: infoTypeTransformations,
    },
    inspectConfig,
    item: item,
  };

  // Run de-identification request
  const [response] = await dlp.deidentifyContent(request);
  const deidentifiedItem = response.item;

  // Print results
  console.log(deidentifiedItem.value);
}
await deidentifyDeterministic();

PHP

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\CryptoDeterministicConfig;
use Google\Cloud\Dlp\V2\CryptoKey;
use Google\Cloud\Dlp\V2\DeidentifyConfig;
use Google\Cloud\Dlp\V2\DeidentifyContentRequest;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InfoTypeTransformations;
use Google\Cloud\Dlp\V2\InfoTypeTransformations\InfoTypeTransformation;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\KmsWrappedCryptoKey;
use Google\Cloud\Dlp\V2\PrimitiveTransformation;

/**
 * De-identify content through deterministic encryption.
 * Use the Data Loss Prevention API to de-identify sensitive data in a string using deterministic encryption,
 * which is a reversible cryptographic method. The encryption is performed with a wrapped key.
 *
 * @param string $callingProjectId  The Google Cloud project id to use as a parent resource.
 * @param string $inputString       The string to deidentify (will be treated as text).
 * @param string $kmsKeyName        The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key.
 * Example: key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'.
 * @param string $infoTypeName      The Info type name to be inspect.
 * @param string $surrogateTypeName The name of the surrogate custom info type to use.
 * Only necessary if you want to reverse the deidentification process. Can be essentially any arbitrary
 * string, as long as it doesn't appear in your dataset otherwise.
 * @param string $wrappedAesKey     The encrypted ('wrapped') AES-256 key to use.
 *
 * */

function deidentify_deterministic(
    string $callingProjectId,
    string $kmsKeyName,
    string $wrappedAesKey,
    string $inputString = 'My PHONE NUMBER IS 731997681',
    string $infoTypeName = 'PHONE_NUMBER',
    string $surrogateTypeName = 'PHONE_TOKEN'
): void {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    $parent = "projects/$callingProjectId/locations/global";

    // Specify what content you want the service to DeIdentify.
    $content = (new ContentItem())->setValue($inputString);

    // Specify an encrypted AES-256 key and the name of the Cloud KMS key that encrypted it.
    $kmsWrappedCryptoKey = (new KmsWrappedCryptoKey())
        ->setWrappedKey(base64_decode($wrappedAesKey))
        ->setCryptoKeyName($kmsKeyName);

    $cryptoKey = (new CryptoKey())
        ->setKmsWrapped($kmsWrappedCryptoKey);

    // Specify how the info from the inspection should be encrypted.
    $cryptoDeterministicConfig = (new CryptoDeterministicConfig())
        ->setCryptoKey($cryptoKey);

    if (!empty($surrogateTypeName)) {
        $cryptoDeterministicConfig = $cryptoDeterministicConfig->setSurrogateInfoType((new InfoType())
            ->setName($surrogateTypeName));
    }

    // Specify the type of info the inspection will look for.
    $infoType = (new InfoType())
        ->setName($infoTypeName);

    $inspectConfig = (new InspectConfig())
        ->setInfoTypes([$infoType]);

    $primitiveTransformation = (new PrimitiveTransformation())
        ->setCryptoDeterministicConfig($cryptoDeterministicConfig);

    $infoTypeTransformation = (new InfoTypeTransformation())
        ->setPrimitiveTransformation($primitiveTransformation);

    $infoTypeTransformations = (new InfoTypeTransformations())
        ->setTransformations([$infoTypeTransformation]);

    $deidentifyConfig = (new DeidentifyConfig())
        ->setInfoTypeTransformations($infoTypeTransformations);

    // Send the request and receive response from the service.
    $deidentifyContentRequest = (new DeidentifyContentRequest())
        ->setParent($parent)
        ->setDeidentifyConfig($deidentifyConfig)
        ->setItem($content)
        ->setInspectConfig($inspectConfig);
    $response = $dlp->deidentifyContent($deidentifyContentRequest);

    // Print the results.
    printf($response->getItem()->getValue());
}

Python

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import base64
from typing import List

import google.cloud.dlp


def deidentify_with_deterministic(
    project: str,
    input_str: str,
    info_types: List[str],
    surrogate_type: str = None,
    key_name: str = None,
    wrapped_key: str = None,
) -> None:
    """Deidentifies sensitive data in a string using deterministic encryption.
    Args:
        project: The Google Cloud project id to use as a parent resource.
        input_str: The string to deidentify (will be treated as text).
        info_types: A list of strings representing info types to look for.
        surrogate_type: The name of the surrogate custom info type to use. Only
            necessary if you want to reverse the deidentification process. Can
            be essentially any arbitrary string, as long as it doesn't appear
            in your dataset otherwise.
        key_name: The name of the Cloud KMS key used to encrypt ('wrap') the
            AES-256 key. Example:
            key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
            keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
        wrapped_key: The encrypted ('wrapped') AES-256 key to use. This key
            should be encrypted using the Cloud KMS key specified by key_name.
    Returns:
        None; the response from the API is printed to the terminal.
    """

    # Instantiate a client
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Convert the project id into a full resource id.
    parent = f"projects/{project}/locations/global"

    # The wrapped key is base64-encoded, but the library expects a binary
    # string, so decode it here.
    wrapped_key = base64.b64decode(wrapped_key)

    # Construct Deterministic encryption configuration dictionary
    crypto_replace_deterministic_config = {
        "crypto_key": {
            "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name}
        },
    }

    # Add surrogate type
    if surrogate_type:
        crypto_replace_deterministic_config["surrogate_info_type"] = {
            "name": surrogate_type
        }

    # Construct inspect configuration dictionary
    inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}

    # Construct deidentify configuration dictionary
    deidentify_config = {
        "info_type_transformations": {
            "transformations": [
                {
                    "primitive_transformation": {
                        "crypto_deterministic_config": crypto_replace_deterministic_config
                    }
                }
            ]
        }
    }

    # Convert string to item
    item = {"value": input_str}

    # Call the API
    response = dlp.deidentify_content(
        request={
            "parent": parent,
            "deidentify_config": deidentify_config,
            "inspect_config": inspect_config,
            "item": item,
        }
    )

    # Print results
    print(response.item.value)

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.