Re-identify free text with FPE using a surrogate

Uses the Cloud Data Loss Prevention API to re-identify sensitive data in a string that was encrypted by format-preserving encryption (FPE) with a surrogate type. The encryption is performed with an unwrapped key.

Code sample

Java

To learn how to install and use the client library for Cloud DLP, see Cloud DLP client libraries.

To authenticate to Cloud DLP, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.common.io.BaseEncoding;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CryptoKey;
import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig;
import com.google.privacy.dlp.v2.CustomInfoType;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.ReidentifyContentRequest;
import com.google.privacy.dlp.v2.ReidentifyContentResponse;
import com.google.privacy.dlp.v2.UnwrappedCryptoKey;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.Base64;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;

public class ReidentifyFreeTextWithFpeUsingSurrogate {
  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.

    // The Google Cloud project id to use as a parent resource.
    String projectId = "your-project-id";
    // The string to de-identify.
    String textToDeIdentify = "My phone number is 4359916731";
    // The base64-encoded AES-256 key to use.
    String base64EncodedKey = "your-base64-encoded-key";

    // Obtain the de-identified text.
    String textToReIdentify =
        DeidentifyFreeTextWithFpeUsingSurrogate.deIdentifyWithFpeSurrogate(
            projectId, textToDeIdentify, base64EncodedKey);

    reIdentifyWithFpeSurrogate(projectId, textToReIdentify, base64EncodedKey);
  }

  public static void reIdentifyWithFpeSurrogate(
      String projectId, String textToReIdentify, String unwrappedKey) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to ReIdentify
      ContentItem contentItem = ContentItem.newBuilder().setValue(textToReIdentify).build();
      CustomInfoType.SurrogateType surrogateType =
          CustomInfoType.SurrogateType.newBuilder().build();

      // Specify the surrogate type used at time of de-identification
      InfoType surrogateInfoType = InfoType.newBuilder().setName("PHONE_TOKEN").build();

      CustomInfoType customInfoType =
          CustomInfoType.newBuilder()
              .setInfoType(surrogateInfoType)
              .setSurrogateType(surrogateType)
              .build();
      InspectConfig inspectConfig =
          InspectConfig.newBuilder().addCustomInfoTypes(customInfoType).build();

      // Specify an unwrapped crypto key
      UnwrappedCryptoKey unwrappedCryptoKey =
          UnwrappedCryptoKey.newBuilder()
              .setKey(ByteString.copyFrom(BaseEncoding.base64().decode(unwrappedKey)))
              .build();
      CryptoKey cryptoKey = CryptoKey.newBuilder().setUnwrapped(unwrappedCryptoKey).build();

      // Specify how to un-encrypt the previously de-identified information
      CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig =
          CryptoReplaceFfxFpeConfig.newBuilder()
              .setCryptoKey(cryptoKey)
              // Set of characters in the input text. For more info, see
              // https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#DeidentifyTemplate.FfxCommonNativeAlphabet
              .setCommonAlphabet(CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet.NUMERIC)
              .setSurrogateInfoType(surrogateInfoType)
              .build();

      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder()
              .setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig)
              .build();

      InfoTypeTransformations.InfoTypeTransformation infoTypeTransformation =
          InfoTypeTransformations.InfoTypeTransformation.newBuilder()
              .setPrimitiveTransformation(primitiveTransformation)
              .build();

      InfoTypeTransformations transformations =
          InfoTypeTransformations.newBuilder().addTransformations(infoTypeTransformation).build();

      DeidentifyConfig reidentifyConfig =
          DeidentifyConfig.newBuilder().setInfoTypeTransformations(transformations).build();

      // Combine configurations into a request for the service.
      ReidentifyContentRequest request =
          ReidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setInspectConfig(inspectConfig)
              .setReidentifyConfig(reidentifyConfig)
              .build();
      // Send the request and receive response from the service
      ReidentifyContentResponse response = dlp.reidentifyContent(request);

      // Print the results
      System.out.println("Text after re-identification: " + response.getItem().getValue());
    }
  }
}

Python

To learn how to install and use the client library for Cloud DLP, see Cloud DLP client libraries.

To authenticate to Cloud DLP, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import base64  # noqa: F811, E402, I100

import google.cloud.dlp  # noqa: F811, E402


def reidentify_free_text_with_fpe_using_surrogate(
    project: str,
    input_str: str,
    alphabet: str = "NUMERIC",
    surrogate_type: str = "PHONE_TOKEN",
    unwrapped_key: str = "YWJjZGVmZ2hpamtsbW5vcA==",
) -> None:
    """Uses the Data Loss Prevention API to reidentify sensitive data in a
    string that was encrypted by Format Preserving Encryption (FPE) with
    surrogate type. The encryption is performed with an unwrapped key.
    Args:
        project: The Google Cloud project id to use as a parent resource.
        input_str: The string to deidentify (will be treated as text).
        alphabet: The set of characters to replace sensitive ones with. For
            more information, see https://cloud.google.com/dlp/docs/reference/
            rest/v2beta2/organizations.deidentifyTemplates#ffxcommonnativealphabet
        surrogate_type: The name of the surrogate custom info type to used
            during the encryption process.
        unwrapped_key: The base64-encoded AES-256 key to use.
    Returns:
        None; the response from the API is printed to the terminal.
    """

    # Instantiate a client
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Convert the project id into a full resource id.
    parent = f"projects/{project}"

    # The wrapped key is base64-encoded, but the library expects a binary
    # string, so decode it here.
    unwrapped_key = base64.b64decode(unwrapped_key)

    # Construct Deidentify Config
    transformation = {
        "primitive_transformation": {
            "crypto_replace_ffx_fpe_config": {
                "crypto_key": {"unwrapped": {"key": unwrapped_key}},
                "common_alphabet": alphabet,
                "surrogate_info_type": {"name": surrogate_type},
            }
        }
    }

    reidentify_config = {
        "info_type_transformations": {"transformations": [transformation]}
    }

    inspect_config = {
        "custom_info_types": [
            {"info_type": {"name": surrogate_type}, "surrogate_type": {}}
        ]
    }

    # Convert string to item
    item = {"value": input_str}

    # Call the API
    response = dlp.reidentify_content(
        request={
            "parent": parent,
            "reidentify_config": reidentify_config,
            "inspect_config": inspect_config,
            "item": item,
        }
    )

    # Print results
    print(response.item.value)

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.