De-identify free text with FPE by using a surrogate

Uses the Data Loss Prevention API to de-identify sensitive data in a string using format-preserving encryption (FPE). The encryption is performed with an unwrapped key.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


using System;
using System.Collections.Generic;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
using Google.Protobuf;
using static Google.Cloud.Dlp.V2.CryptoReplaceFfxFpeConfig.Types;

public class DeidentifyFreeTextWithFpeUsingSurrogate
{
    public static DeidentifyContentResponse Deidentify(
        string projectId,
        string text,
        string unwrappedKey,
        IEnumerable<InfoType> infoTypes = null,
        FfxCommonNativeAlphabet alphabet = FfxCommonNativeAlphabet.Numeric,
        InfoType surrogateType = null)
    {
        // Instantiate a client.
        var dlp = DlpServiceClient.Create();

        // Specify the type of info to be inspected and construct the inspect config.
        var inspectConfig = new InspectConfig
        {
            InfoTypes =
            {
                infoTypes ?? new InfoType[]
                {
                    new InfoType { Name = "PHONE_NUMBER" }
                }
            },
            MinLikelihood = Likelihood.Unlikely
        };

        // Construct the crypto replace ffxfpe config by providing the unwrapped crypto key.
        var cryptoReplaceFfxFpeConfig = new CryptoReplaceFfxFpeConfig
        {
            CommonAlphabet = alphabet,
            CryptoKey = new CryptoKey
            {
                Unwrapped = new UnwrappedCryptoKey
                {
                    Key = ByteString.FromBase64(unwrappedKey)
                }
            },
            SurrogateInfoType = surrogateType ?? new InfoType { Name = "PHONE_TOKEN" }
        };

        // Construct the deidentify config using crypto config created above.
        var deidentifyConfig = new DeidentifyConfig
        {
            InfoTypeTransformations = new InfoTypeTransformations
            {
                Transformations =
                {
                    new InfoTypeTransformations.Types.InfoTypeTransformation
                    {
                        PrimitiveTransformation = new PrimitiveTransformation
                        {
                            CryptoReplaceFfxFpeConfig = cryptoReplaceFfxFpeConfig
                        }
                    }
                }
            }
        };

        // Construct the request.
        var request = new DeidentifyContentRequest
        {
            ParentAsLocationName = new LocationName(projectId, "global"),
            DeidentifyConfig = deidentifyConfig,
            InspectConfig = inspectConfig,
            Item = new ContentItem { Value = text }
        };

        // Call the API.
        DeidentifyContentResponse response = dlp.DeidentifyContent(request);

        // Check the de-identified content.
        Console.WriteLine($"De-identified content: {response.Item.Value}");
        return response;
    }
}

Go

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"encoding/base64"
	"fmt"
	"io"

	dlp "cloud.google.com/go/dlp/apiv2"
	"cloud.google.com/go/dlp/apiv2/dlppb"
)

// deidentifyFreeTextWithFPEUsingSurrogate de-identifies free text with FPE by using a surrogate
func deidentifyFreeTextWithFPEUsingSurrogate(w io.Writer, projectID, inputStr, infoType, surrogateType, unwrappedKey string) error {
	// projectId := "your-project-id"
	// inputStr := "My phone number is 9824376677"
	// infoType := "PHONE_NUMBER"
	// surrogateType := "PHONE_TOKEN"
	// unwrappedKey := "The base64-encoded AES-256 key to use"

	ctx := context.Background()

	// Initialize a client once and reuse it to send multiple requests. Clients
	// are safe to use across goroutines. When the client is no longer needed,
	// call the Close method to cleanup its resources.
	client, err := dlp.NewClient(ctx)
	if err != nil {
		return err
	}

	// Closing the client safely cleans up background resources.
	defer client.Close()

	// decoding the unwrapped key
	keyDecode, err := base64.StdEncoding.DecodeString(unwrappedKey)
	if err != nil {
		return err
	}

	inspectConfig := &dlppb.InspectConfig{
		InfoTypes: []*dlppb.InfoType{
			{Name: infoType},
		},
		MinLikelihood: dlppb.Likelihood_UNLIKELY,
	}

	// specify the content to be de-identified
	contentItem := &dlppb.ContentItem{
		DataItem: &dlppb.ContentItem_Value{
			Value: inputStr,
		},
	}

	// Specify how the content should be encrypted.
	cryptoReplaceConfig := &dlppb.CryptoReplaceFfxFpeConfig{
		CryptoKey: &dlppb.CryptoKey{
			Source: &dlppb.CryptoKey_Unwrapped{
				Unwrapped: &dlppb.UnwrappedCryptoKey{
					Key: keyDecode,
				},
			},
		},
		Alphabet: &dlppb.CryptoReplaceFfxFpeConfig_CommonAlphabet{
			CommonAlphabet: dlppb.CryptoReplaceFfxFpeConfig_NUMERIC,
		},
		SurrogateInfoType: &dlppb.InfoType{
			Name: surrogateType,
		},
	}

	// Apply crypto replace config to primitive transformation.
	primitiveTransformation := &dlppb.PrimitiveTransformation{
		Transformation: &dlppb.PrimitiveTransformation_CryptoReplaceFfxFpeConfig{
			CryptoReplaceFfxFpeConfig: cryptoReplaceConfig,
		},
	}

	infoTypeTransformation := &dlppb.DeidentifyConfig_InfoTypeTransformations{
		InfoTypeTransformations: &dlppb.InfoTypeTransformations{
			Transformations: []*dlppb.InfoTypeTransformations_InfoTypeTransformation{
				{
					InfoTypes: []*dlppb.InfoType{
						{Name: infoType},
					},
					PrimitiveTransformation: primitiveTransformation,
				},
			},
		},
	}

	// Construct the de-identification request to be sent by the client.
	req := &dlppb.DeidentifyContentRequest{
		Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
		DeidentifyConfig: &dlppb.DeidentifyConfig{
			Transformation: infoTypeTransformation,
		},
		InspectConfig: inspectConfig,
		Item:          contentItem,
	}

	// Send the request.
	resp, err := client.DeidentifyContent(ctx, req)
	if err != nil {
		return err
	}

	// Print the results.
	fmt.Fprintf(w, "output: %v", resp.GetItem().GetValue())
	return nil
}

Java

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.common.io.BaseEncoding;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CryptoKey;
import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.UnwrappedCryptoKey;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.Base64;
import java.util.Collections;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;

public class DeidentifyFreeTextWithFpeUsingSurrogate {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.

    // The Google Cloud project id to use as a parent resource.
    String projectId = "your-project-id";
    // The string to de-identify.
    String textToDeIdentify = "My phone number is 4359916732";
    // The base64-encoded key to use.
    String base64EncodedKey = "your-base64-encoded-key";

    deIdentifyWithFpeSurrogate(projectId, textToDeIdentify, base64EncodedKey);
  }

  /**
   * Uses the Data Loss Prevention API to deidentify sensitive data in a string using Format
   * Preserving Encryption (FPE).The encryption is performed with an unwrapped key.
   *
   * @param projectId The Google Cloud project id to use as a parent resource.
   * @param textToDeIdentify The string to deidentify.
   * @param unwrappedKey The base64-encoded AES-256 key to use.
   */
  public static String deIdentifyWithFpeSurrogate(
      String projectId, String textToDeIdentify, String unwrappedKey) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Set the text to be de-identified.
      ContentItem contentItem = ContentItem.newBuilder().setValue(textToDeIdentify).build();

      // Specify the InfoType the inspection will look for.
      // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
      InfoType infoType = InfoType.newBuilder()
              .setName("PHONE_NUMBER").build();

      InspectConfig inspectConfig =
          InspectConfig.newBuilder()
                  .addAllInfoTypes(Collections.singletonList(infoType)).build();

      // Specify an unwrapped crypto key.
      UnwrappedCryptoKey unwrappedCryptoKey =
          UnwrappedCryptoKey.newBuilder()
              .setKey(ByteString.copyFrom(BaseEncoding.base64().decode(unwrappedKey)))
              .build();

      CryptoKey cryptoKey = CryptoKey.newBuilder().setUnwrapped(unwrappedCryptoKey).build();

      InfoType surrogateInfoType = InfoType.newBuilder().setName("PHONE_TOKEN").build();

      // Specify how the info from the inspection should be encrypted.
      CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig =
          CryptoReplaceFfxFpeConfig.newBuilder()
              .setCryptoKey(cryptoKey)
              // Set of characters in the input text. For more info, see
              // https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#DeidentifyTemplate.FfxCommonNativeAlphabet
              .setCommonAlphabet(CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet.NUMERIC)
              .setSurrogateInfoType(surrogateInfoType)
              .build();

      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder()
              .setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig)
              .build();

      InfoTypeTransformation infoTypeTransformation =
          InfoTypeTransformation.newBuilder()
              .setPrimitiveTransformation(primitiveTransformation)
              .build();

      InfoTypeTransformations transformations =
          InfoTypeTransformations.newBuilder()
                  .addTransformations(infoTypeTransformation).build();

      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder()
                  .setInfoTypeTransformations(transformations).build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setInspectConfig(inspectConfig)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println("Text after de-identification: " + response.getItem().getValue());

      return response.getItem().getValue();
    }
  }
}

Node.js

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Instantiates a client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const projectId = 'my-project';

// The string to de-identify
// const string = 'My phone number is 4359916732';

// The set of characters to replace sensitive ones with
// For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#ffxcommonnativealphabet
// const alphabet = 'NUMERIC';

// InfoTypes
// const infoTypes = [{name: 'PHONE_NUMBER'}];

// Surrogate Type
// const surrogateType = 'PHONE_TOKEN';

// The base64-encoded AES-256 key to use
// const unwrappedKey = 'YWJjZGVmZ2hpamtsbW5vcA==';

async function deidentifyWithFpeSurrogate() {
  // Specify an unwrapped crypto key.
  unwrappedKey = Buffer.from(unwrappedKey, 'base64');

  // Specify how the info from the inspection should be encrypted.
  const cryptoReplaceFfxFpeConfig = {
    cryptoKey: {
      unwrapped: {
        key: unwrappedKey,
      },
    },
    commonAlphabet: alphabet,
    surrogateInfoType: {name: surrogateType},
  };

  // Construct the inspect configuration.
  const inspectConfig = {
    infoTypes: infoTypes,
    minLikelihood: DLP.protos.google.privacy.dlp.v2.Likelihood.UNLIKELY,
  };

  // Set the text to be de-identified.
  const item = {value: string};

  // Combine configurations into a request for the service.
  const request = {
    parent: `projects/${projectId}/locations/global`,
    deidentifyConfig: {
      infoTypeTransformations: {
        transformations: [
          {
            primitiveTransformation: {
              cryptoReplaceFfxFpeConfig: cryptoReplaceFfxFpeConfig,
            },
          },
        ],
      },
    },
    item: item,
    inspectConfig: inspectConfig,
  };
  // Run de-identification request
  const [response] = await dlp.deidentifyContent(request);
  const deidentifiedItem = response.item;
  // Print results
  console.log(deidentifiedItem.value);
}
deidentifyWithFpeSurrogate();

PHP

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


/**
 * Deidentify free text with fpe using surrogate.
 * Uses the Data Loss Prevention API to de-identify sensitive data in a string using format-preserving
 * encryption (FPE). The encryption is performed with an unwrapped key.
 *
 * @param string $callingProjectId  The GCP Project ID to run the API call under.
 * @param string $string            The string to deidentify (will be treated as text).
 * @param string $unwrappedKey      The base64-encoded AES-256 key to use.
 * @param string $surrogateTypeName The name of the surrogate custom info type to use.
 * Can be essentially any arbitrary string, as long as it doesn't appear in your dataset otherwise.
 */
function deidentify_free_text_with_fpe_using_surrogate(
    // TODO(developer): Replace sample parameters before running the code.
    string $callingProjectId,
    string $string = 'My PHONE NUMBER IS 731997681',
    string $unwrappedKey = 'YWJjZGVmZ2hpamtsbW5vcA==',
    string $surrogateTypeName = 'PHONE_TOKEN'
): void {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    $parent = "projects/$callingProjectId/locations/global";

    // Specify an unwrapped crypto key.
    $unwrapped = (new UnwrappedCryptoKey())
        ->setKey(base64_decode($unwrappedKey));

    $cryptoKey = (new CryptoKey())
        ->setUnwrapped($unwrapped);

    // Create the surrogate type configuration object.
    $surrogateType = (new InfoType())
        ->setName($surrogateTypeName);

    // The set of characters to replace sensitive ones with.
    // For more information, see https://cloud.google.com/dlp/docs/reference/rest/V2/organizations.deidentifyTemplates#ffxcommonnativealphabet
    $commonAlphabet = FfxCommonNativeAlphabet::NUMERIC;

    // Specify how to decrypt the previously de-identified information.
    $cryptoReplaceFfxFpeConfig = (new CryptoReplaceFfxFpeConfig())
        ->setCryptoKey($cryptoKey)
        ->setCommonAlphabet($commonAlphabet)
        ->setSurrogateInfoType($surrogateType);

    // Create the information transform configuration objects.
    $primitiveTransformation = (new PrimitiveTransformation())
        ->setCryptoReplaceFfxFpeConfig($cryptoReplaceFfxFpeConfig);

    // The infoTypes of information to mask.
    $infoType = (new InfoType())
        ->setName('PHONE_NUMBER');
    $infoTypes = [$infoType];

    $infoTypeTransformation = (new InfoTypeTransformation())
        ->setPrimitiveTransformation($primitiveTransformation)
        ->setInfoTypes($infoTypes);

    $infoTypeTransformations = (new InfoTypeTransformations())
        ->setTransformations([$infoTypeTransformation]);

    // Create the deidentification configuration object.
    $deidentifyConfig = (new DeidentifyConfig())
        ->setInfoTypeTransformations($infoTypeTransformations);

    // Specify the content to be de-identify.
    $content = (new ContentItem())
        ->setValue($string);

    // Create the configuration object.
    $inspectConfig = (new InspectConfig())
        /* Construct the inspect config, trying to finding all PII with likelihood
        higher than UNLIKELY */
        ->setMinLikelihood(likelihood::UNLIKELY)
        ->setInfoTypes($infoTypes);

    // Run request.
    $deidentifyContentRequest = (new DeidentifyContentRequest())
        ->setParent($parent)
        ->setDeidentifyConfig($deidentifyConfig)
        ->setItem($content)
        ->setInspectConfig($inspectConfig);
    $response = $dlp->deidentifyContent($deidentifyContentRequest);

    // Print the results.
    printf($response->getItem()->getValue());
}

Python

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import base64

import google.cloud.dlp


def deidentify_free_text_with_fpe_using_surrogate(
    project: str,
    input_str: str,
    alphabet: str = "NUMERIC",
    info_type: str = "PHONE_NUMBER",
    surrogate_type: str = "PHONE_TOKEN",
    unwrapped_key: str = "YWJjZGVmZ2hpamtsbW5vcA==",
) -> None:
    """Uses the Data Loss Prevention API to deidentify sensitive data in a
       string using Format Preserving Encryption (FPE).
       The encryption is performed with an unwrapped key.
    Args:
        project: The Google Cloud project id to use as a parent resource.
        input_str: The string to deidentify (will be treated as text).
        alphabet: The set of characters to replace sensitive ones with. For
            more information, see https://cloud.google.com/dlp/docs/reference/
            rest/v2beta2/organizations.deidentifyTemplates#ffxcommonnativealphabet
        info_type: The name of the info type to de-identify
        surrogate_type: The name of the surrogate custom info type to use. Can
            be essentially any arbitrary string, as long as it doesn't appear
            in your dataset otherwise.
        unwrapped_key: The base64-encoded AES-256 key to use.
    Returns:
        None; the response from the API is printed to the terminal.
    """

    # Instantiate a client
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Convert the project id into a full resource id.
    parent = f"projects/{project}/locations/global"

    # The wrapped key is base64-encoded, but the library expects a binary
    # string, so decode it here.
    unwrapped_key = base64.b64decode(unwrapped_key)

    # Construct de-identify config
    transformation = {
        "info_types": [{"name": info_type}],
        "primitive_transformation": {
            "crypto_replace_ffx_fpe_config": {
                "crypto_key": {"unwrapped": {"key": unwrapped_key}},
                "common_alphabet": alphabet,
                "surrogate_info_type": {"name": surrogate_type},
            }
        },
    }

    deidentify_config = {
        "info_type_transformations": {"transformations": [transformation]}
    }

    # Construct the inspect config, trying to finding all PII with likelihood
    # higher than UNLIKELY
    inspect_config = {
        "info_types": [{"name": info_type}],
        "min_likelihood": google.cloud.dlp_v2.Likelihood.UNLIKELY,
    }

    # Convert string to item
    item = {"value": input_str}

    # Call the API
    response = dlp.deidentify_content(
        request={
            "parent": parent,
            "deidentify_config": deidentify_config,
            "inspect_config": inspect_config,
            "item": item,
        }
    )

    # Print results
    print(response.item.value)

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.