Examples of de-identifying tabular data

Cloud Data Loss Prevention (DLP) can detect, classify, and de-identify sensitive data within structured data. When de-identifying content as a table, the structure and columns provide Cloud DLP with additional clues that may enable it to provide better results for some use cases. For example, you can scan a single column for a certain data type instead of the entire table structure.

This topic provides examples of how to configure de-identification of sensitive data within structured text. De-identification is enabled through record transformations. These transformations are applied to values within tabular text data that are identified as a specific infoType, or to an entire column of tabular data.

This topic also provides examples of tabular data transformations using the cryptographic hash method. The cryptographic transformation methods are unique because of their requirement of a cryptographic key.

The JSON given in the following examples can be inserted into any de-identification request inside the "deidentifyConfig" (DeidentifyConfig) attribute. Click the "APIs Explorer example" link to try out the example JSON in APIs Explorer.

Transform a column without inspection

To transform a specific column in which the content is already known, you can skip inspection and specify a transformation directly. The example following the table buckets the "HAPPINESS SCORE" column into increments of 10.

Input Transformed table
AGE PATIENT HAPPINESS SCORE
101 Charles Dickens 95
22 Jane Austen 21
55 Mark Twain 75
AGE PATIENT HAPPINESS SCORE
101 Charles Dickens 90:100
22 Jane Austen 20:30
55 Mark Twain 70:80

Java

To learn how to install and use the client library for Cloud DLP, see the Cloud DLP Client Libraries.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.FieldTransformation;
import com.google.privacy.dlp.v2.FixedSizeBucketingConfig;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import java.io.IOException;

public class DeIdentifyTableBucketing {

  public static void deIdentifyTableBucketing() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    Table tableToDeIdentify =
        Table.newBuilder()
            .addHeaders(FieldId.newBuilder().setName("AGE").build())
            .addHeaders(FieldId.newBuilder().setName("PATIENT").build())
            .addHeaders(FieldId.newBuilder().setName("HAPPINESS SCORE").build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("101").build())
                    .addValues(Value.newBuilder().setStringValue("Charles Dickens").build())
                    .addValues(Value.newBuilder().setStringValue("95").build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("22").build())
                    .addValues(Value.newBuilder().setStringValue("Jane Austen").build())
                    .addValues(Value.newBuilder().setStringValue("21").build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("55").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain").build())
                    .addValues(Value.newBuilder().setStringValue("75").build())
                    .build())
            .build();

    deIdentifyTableBucketing(projectId, tableToDeIdentify);
  }

  public static Table deIdentifyTableBucketing(String projectId, Table tableToDeIdentify)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to de-identify.
      ContentItem contentItem = ContentItem.newBuilder().setTable(tableToDeIdentify).build();

      // Specify how the content should be de-identified.
      FixedSizeBucketingConfig fixedSizeBucketingConfig =
          FixedSizeBucketingConfig.newBuilder()
              .setBucketSize(10)
              .setLowerBound(Value.newBuilder().setIntegerValue(0).build())
              .setUpperBound(Value.newBuilder().setIntegerValue(100).build())
              .build();
      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder()
              .setFixedSizeBucketingConfig(fixedSizeBucketingConfig)
              .build();

      // Specify field to be encrypted.
      FieldId fieldId = FieldId.newBuilder().setName("HAPPINESS SCORE").build();

      // Associate the encryption with the specified field.
      FieldTransformation fieldTransformation =
          FieldTransformation.newBuilder()
              .setPrimitiveTransformation(primitiveTransformation)
              .addFields(fieldId)
              .build();
      RecordTransformations transformations =
          RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build();

      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder().setRecordTransformations(transformations).build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println("Table after de-identification: " + response.getItem().getTable());

      return response.getItem().getTable();
    }
  }
}

API Explorer example

"deidentifyConfig":{
  "recordTransformations":{
    "fieldTransformations":[
      {
        "fields":[
          {
            "name":"HAPPINESS SCORE"
          }
        ],
        "primitiveTransformation":{
          "fixedSizeBucketingConfig":{
            "bucketSize":10,
            "lowerBound":{
              "integerValue":"0"
            },
            "upperBound":{
              "integerValue":"100"
            }
          }
        }
      }
    ]
  }
}

Transform a column based on the value of another column

You can transform a column based on the value of another. This example redacts "HAPPINESS SCORE" for all patients over 89.

Input Transformed table
AGE PATIENT HAPPINESS SCORE
101 Charles Dickens 95
22 Jane Austen 21
55 Mark Twain 75
AGE PATIENT HAPPINESS SCORE
101 Charles Dickens **
22 Jane Austen 21
55 Mark Twain 75

Java

To learn how to install and use the client library for Cloud DLP, see the Cloud DLP Client Libraries.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.CharacterMaskConfig;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.FieldTransformation;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.RecordCondition;
import com.google.privacy.dlp.v2.RecordCondition.Condition;
import com.google.privacy.dlp.v2.RecordCondition.Conditions;
import com.google.privacy.dlp.v2.RecordCondition.Expressions;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.RelationalOperator;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import java.io.IOException;

public class DeIdentifyTableConditionMasking {

  public static void deIdentifyTableConditionMasking() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    Table tableToDeIdentify =
        Table.newBuilder()
            .addHeaders(FieldId.newBuilder().setName("AGE").build())
            .addHeaders(FieldId.newBuilder().setName("PATIENT").build())
            .addHeaders(FieldId.newBuilder().setName("HAPPINESS SCORE").build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("101").build())
                    .addValues(Value.newBuilder().setStringValue("Charles Dickens").build())
                    .addValues(Value.newBuilder().setStringValue("95").build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("22").build())
                    .addValues(Value.newBuilder().setStringValue("Jane Austen").build())
                    .addValues(Value.newBuilder().setStringValue("21").build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("55").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain").build())
                    .addValues(Value.newBuilder().setStringValue("75").build())
                    .build())
            .build();

    deIdentifyTableConditionMasking(projectId, tableToDeIdentify);
  }

  public static Table deIdentifyTableConditionMasking(String projectId, Table tableToDeIdentify)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to de-identify.
      ContentItem contentItem = ContentItem.newBuilder().setTable(tableToDeIdentify).build();

      // Specify how the content should be de-identified.
      CharacterMaskConfig characterMaskConfig =
          CharacterMaskConfig.newBuilder().setMaskingCharacter("*").build();
      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder().setCharacterMaskConfig(characterMaskConfig).build();

      // Specify field to be de-identified.
      FieldId fieldId = FieldId.newBuilder().setName("HAPPINESS SCORE").build();

      // Specify when the above field should be de-identified.
      Condition condition =
          Condition.newBuilder()
              .setField(FieldId.newBuilder().setName("AGE").build())
              .setOperator(RelationalOperator.GREATER_THAN)
              .setValue(Value.newBuilder().setIntegerValue(89).build())
              .build();
      // Apply the condition to records
      RecordCondition recordCondition =
          RecordCondition.newBuilder()
              .setExpressions(
                  Expressions.newBuilder()
                      .setConditions(Conditions.newBuilder().addConditions(condition).build())
                      .build())
              .build();

      // Associate the de-identification and conditions with the specified field.
      FieldTransformation fieldTransformation =
          FieldTransformation.newBuilder()
              .setPrimitiveTransformation(primitiveTransformation)
              .addFields(fieldId)
              .setCondition(recordCondition)
              .build();
      RecordTransformations transformations =
          RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build();

      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder().setRecordTransformations(transformations).build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println("Table after de-identification: " + response.getItem().getTable());

      return response.getItem().getTable();
    }
  }
}

API Explorer example

"deidentifyConfig":{
  "recordTransformations":{
    "fieldTransformations":[
      {
        "fields":[
          {
            "name":"HAPPINESS SCORE"
          }
        ],
        "primitiveTransformation":{
          "characterMaskConfig":{
            "maskingCharacter":"*"
          }
        },
        "condition":{
          "expressions":{
            "conditions":{
              "conditions":[
                {
                  "field":{
                    "name":"AGE"
                  },
                  "operator":"GREATER_THAN",
                  "value":{
                    "integerValue":"89"
                  }
                }
              ]
            }
          }
        }
      }
    ]
  }
}

Transform findings found in columns

You can transform findings that make up either only part of a cell's content or all of it. In this example, all instances of PERSON_NAME are anonymized.

Input Transformed table
AGE PATIENT HAPPINESS SCORE FACTOID
101 Charles Dickens 95 Charles Dickens name was a curse, possibly invented by Shakespeare.
22 Jane Austen 21 There are 14 kisses in Jane Austen's novels.
55 Mark Twain 75 Mark Twain loved cats.
AGE PATIENT HAPPINESS SCORE FACTOID
101 [PERSON_NAME] 95 [PERSON_NAME] name was a curse, possibly invented by Shakespeare.
22 [PERSON_NAME] 21 There are 14 kisses in [PERSON_NAME] novels.
55 [PERSON_NAME] 75 [PERSON_NAME] loved cats.

Java

To learn how to install and use the client library for Cloud DLP, see the Cloud DLP Client Libraries.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.FieldTransformation;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.ReplaceWithInfoTypeConfig;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class DeIdentifyTableInfoTypes {

  public static void deIdentifyTableInfoTypes() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    Table tableToDeIdentify =
        Table.newBuilder()
            .addHeaders(FieldId.newBuilder().setName("AGE").build())
            .addHeaders(FieldId.newBuilder().setName("PATIENT").build())
            .addHeaders(FieldId.newBuilder().setName("HAPPINESS SCORE").build())
            .addHeaders(FieldId.newBuilder().setName("FACTOID").build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("101").build())
                    .addValues(Value.newBuilder().setStringValue("Charles Dickens").build())
                    .addValues(Value.newBuilder().setStringValue("95").build())
                    .addValues(
                        Value.newBuilder()
                            .setStringValue(
                                "Charles Dickens name was a curse invented by Shakespeare.")
                            .build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("22").build())
                    .addValues(Value.newBuilder().setStringValue("Jane Austen").build())
                    .addValues(Value.newBuilder().setStringValue("21").build())
                    .addValues(
                        Value.newBuilder()
                            .setStringValue("There are 14 kisses in Jane Austen's novels.")
                            .build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("55").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain").build())
                    .addValues(Value.newBuilder().setStringValue("75").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain loved cats.").build())
                    .build())
            .build();

    deIdentifyTableInfoTypes(projectId, tableToDeIdentify);
  }

  public static Table deIdentifyTableInfoTypes(String projectId, Table tableToDeIdentify)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to de-identify.
      ContentItem contentItem = ContentItem.newBuilder().setTable(tableToDeIdentify).build();

      // Specify how the content should be de-identified.
      // Select type of info to be replaced.
      InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build();
      // Specify that findings should be replaced with corresponding info type name.
      ReplaceWithInfoTypeConfig replaceWithInfoTypeConfig =
          ReplaceWithInfoTypeConfig.getDefaultInstance();
      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder()
              .setReplaceWithInfoTypeConfig(replaceWithInfoTypeConfig)
              .build();
      // Associate info type with the replacement strategy
      InfoTypeTransformation infoTypeTransformation =
          InfoTypeTransformation.newBuilder()
              .addInfoTypes(infoType)
              .setPrimitiveTransformation(primitiveTransformation)
              .build();
      InfoTypeTransformations infoTypeTransformations =
          InfoTypeTransformations.newBuilder().addTransformations(infoTypeTransformation).build();

      // Specify fields to be de-identified.
      List<FieldId> fieldIds =
          Stream.of("PATIENT", "FACTOID")
              .map(id -> FieldId.newBuilder().setName(id).build())
              .collect(Collectors.toList());

      // Associate the de-identification and conditions with the specified field.
      FieldTransformation fieldTransformation =
          FieldTransformation.newBuilder()
              .setInfoTypeTransformations(infoTypeTransformations)
              .addAllFields(fieldIds)
              .build();
      RecordTransformations transformations =
          RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build();

      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder().setRecordTransformations(transformations).build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println("Table after de-identification: " + response.getItem().getTable());

      return response.getItem().getTable();
    }
  }
}

APIs Explorer example

"deidentifyConfig":{
  "recordTransformations":{
    "fieldTransformations":[
      {
        "infoTypeTransformations":{
          "transformations":[
            {
              "infoTypes":[
                {
                  "name":"PERSON_NAME"
                }
              ],
              "primitiveTransformation":{
                "replaceWithInfoTypeConfig":{

                }
              }
            }
          ]
        },
        "fields":[
          {
            "name":"PATIENT"
          },
          {
            "name":"FACTOID"
          }
        ]
      }
    ]
  }
}

Suppress a row based on the content of a column

You can remove a row entirely based on the content that appears in any column. This example suppresses the record for "Charles Dickens," as this patient is over 89 years old.

Input Transformed table
AGE PATIENT HAPPINESS SCORE
101 Charles Dickens 95
22 Jane Austen 21
55 Mark Twain 75
AGE PATIENT HAPPINESS SCORE
22 Jane Austen 21
55 Mark Twain 75

Java

To learn how to install and use the client library for Cloud DLP, see the Cloud DLP Client Libraries.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.RecordCondition;
import com.google.privacy.dlp.v2.RecordCondition.Condition;
import com.google.privacy.dlp.v2.RecordCondition.Conditions;
import com.google.privacy.dlp.v2.RecordCondition.Expressions;
import com.google.privacy.dlp.v2.RecordSuppression;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.RelationalOperator;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import java.io.IOException;

public class DeIdentifyTableRowSuppress {

  public static void deIdentifyTableRowSuppress() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    Table tableToDeIdentify =
        Table.newBuilder()
            .addHeaders(FieldId.newBuilder().setName("AGE").build())
            .addHeaders(FieldId.newBuilder().setName("PATIENT").build())
            .addHeaders(FieldId.newBuilder().setName("HAPPINESS SCORE").build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("101").build())
                    .addValues(Value.newBuilder().setStringValue("Charles Dickens").build())
                    .addValues(Value.newBuilder().setStringValue("95").build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("22").build())
                    .addValues(Value.newBuilder().setStringValue("Jane Austen").build())
                    .addValues(Value.newBuilder().setStringValue("21").build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("55").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain").build())
                    .addValues(Value.newBuilder().setStringValue("75").build())
                    .build())
            .build();

    deIdentifyTableRowSuppress(projectId, tableToDeIdentify);
  }

  public static Table deIdentifyTableRowSuppress(String projectId, Table tableToDeIdentify)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to de-identify.
      ContentItem contentItem = ContentItem.newBuilder().setTable(tableToDeIdentify).build();

      // Specify when the content should be de-identified.
      Condition condition =
          Condition.newBuilder()
              .setField(FieldId.newBuilder().setName("AGE").build())
              .setOperator(RelationalOperator.GREATER_THAN)
              .setValue(Value.newBuilder().setIntegerValue(89).build())
              .build();
      // Apply the condition to record suppression.
      RecordSuppression recordSuppressions =
          RecordSuppression.newBuilder()
              .setCondition(
                  RecordCondition.newBuilder()
                      .setExpressions(
                          Expressions.newBuilder()
                              .setConditions(
                                  Conditions.newBuilder().addConditions(condition).build())
                              .build())
                      .build())
              .build();
      // Use record suppression as the only transformation
      RecordTransformations transformations =
          RecordTransformations.newBuilder().addRecordSuppressions(recordSuppressions).build();

      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder().setRecordTransformations(transformations).build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println("Table after de-identification: " + response.getItem().getTable());

      return response.getItem().getTable();
    }
  }
}

APIs Explorer example

"deidentifyConfig":{
  "recordTransformations":{
    "recordSuppressions":[
      {
        "condition":{
          "expressions":{
            "conditions":{
              "conditions":[
                {
                  "field":{
                    "name":"AGE"
                  },
                  "operator":"GREATER_THAN",
                  "value":{
                    "integerValue":"89"
                  }
                }
              ]
            }
          }
        }
      }
    ]
  }
}

Transform findings only when specific conditions are met on another field

In this example, the PERSON_NAME findings are only redacted if the "AGE" column indicates the patient is over the age of 89.

Input Transformed table
AGE PATIENT HAPPINESS SCORE FACTOID
101 Charles Dickens 95 Charles Dickens name was a curse, possibly invented by Shakespeare.
22 Jane Austen 21 There are 14 kisses in Jane Austen's novels.
55 Mark Twain 75 Mark Twain loved cats.
AGE PATIENT HAPPINESS SCORE FACTOID
101 [PERSON_NAME] 95 [PERSON_NAME] name was a curse, possibly invented by [PERSON_NAME].
22 Jane Austen 21 There are 14 kisses in Jane Austen's novels.
55 Mark Twain 75 Mark Twain loved cats.

Java

To learn how to install and use the client library for Cloud DLP, see the Cloud DLP Client Libraries.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.FieldTransformation;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.RecordCondition;
import com.google.privacy.dlp.v2.RecordCondition.Condition;
import com.google.privacy.dlp.v2.RecordCondition.Conditions;
import com.google.privacy.dlp.v2.RecordCondition.Expressions;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.RelationalOperator;
import com.google.privacy.dlp.v2.ReplaceWithInfoTypeConfig;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class DeIdentifyTableConditionInfoTypes {

  public static void deIdentifyTableConditionInfoTypes() throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    Table tableToDeIdentify =
        Table.newBuilder()
            .addHeaders(FieldId.newBuilder().setName("AGE").build())
            .addHeaders(FieldId.newBuilder().setName("PATIENT").build())
            .addHeaders(FieldId.newBuilder().setName("HAPPINESS SCORE").build())
            .addHeaders(FieldId.newBuilder().setName("FACTOID").build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("101").build())
                    .addValues(Value.newBuilder().setStringValue("Charles Dickens").build())
                    .addValues(Value.newBuilder().setStringValue("95").build())
                    .addValues(
                        Value.newBuilder()
                            .setStringValue(
                                "Charles Dickens name was a curse invented by Shakespeare.")
                            .build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("22").build())
                    .addValues(Value.newBuilder().setStringValue("Jane Austen").build())
                    .addValues(Value.newBuilder().setStringValue("21").build())
                    .addValues(
                        Value.newBuilder()
                            .setStringValue("There are 14 kisses in Jane Austen's novels.")
                            .build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("55").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain").build())
                    .addValues(Value.newBuilder().setStringValue("75").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain loved cats.").build())
                    .build())
            .build();

    deIdentifyTableConditionInfoTypes(projectId, tableToDeIdentify);
  }

  public static Table deIdentifyTableConditionInfoTypes(String projectId, Table tableToDeIdentify)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to de-identify.
      ContentItem contentItem = ContentItem.newBuilder().setTable(tableToDeIdentify).build();

      // Specify how the content should be de-identified.
      // Select type of info to be replaced.
      InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build();
      // Specify that findings should be replaced with corresponding info type name.
      ReplaceWithInfoTypeConfig replaceWithInfoTypeConfig =
          ReplaceWithInfoTypeConfig.getDefaultInstance();
      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder()
              .setReplaceWithInfoTypeConfig(replaceWithInfoTypeConfig)
              .build();
      // Associate info type with the replacement strategy
      InfoTypeTransformation infoTypeTransformation =
          InfoTypeTransformation.newBuilder()
              .addInfoTypes(infoType)
              .setPrimitiveTransformation(primitiveTransformation)
              .build();
      InfoTypeTransformations infoTypeTransformations =
          InfoTypeTransformations.newBuilder().addTransformations(infoTypeTransformation).build();

      // Specify fields to be de-identified.
      List<FieldId> fieldIds =
          Stream.of("PATIENT", "FACTOID")
              .map(id -> FieldId.newBuilder().setName(id).build())
              .collect(Collectors.toList());

      // Specify when the above fields should be de-identified.
      Condition condition =
          Condition.newBuilder()
              .setField(FieldId.newBuilder().setName("AGE").build())
              .setOperator(RelationalOperator.GREATER_THAN)
              .setValue(Value.newBuilder().setIntegerValue(89).build())
              .build();
      // Apply the condition to records
      RecordCondition recordCondition =
          RecordCondition.newBuilder()
              .setExpressions(
                  Expressions.newBuilder()
                      .setConditions(Conditions.newBuilder().addConditions(condition).build())
                      .build())
              .build();

      // Associate the de-identification and conditions with the specified fields.
      FieldTransformation fieldTransformation =
          FieldTransformation.newBuilder()
              .setInfoTypeTransformations(infoTypeTransformations)
              .addAllFields(fieldIds)
              .setCondition(recordCondition)
              .build();
      RecordTransformations transformations =
          RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build();

      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder().setRecordTransformations(transformations).build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println("Table after de-identification: " + response.getItem().getTable());

      return response.getItem().getTable();
    }
  }
}

APIs Explorer example

"deidentifyConfig":{
  "recordTransformations":{
    "fieldTransformations":[
      {
        "infoTypeTransformations":{
          "transformations":[
            {
              "infoTypes":[
                {
                  "name":"PERSON_NAME"
                }
              ],
              "primitiveTransformation":{
                "replaceWithInfoTypeConfig":{

                }
              }
            }
          ]
        },
        "fields":[
          {
            "name":"PATIENT"
          },
          {
            "name":"FACTOID"
          }
        ],
        "condition":{
          "expressions":{
            "conditions":{
              "conditions":[
                {
                  "field":{
                    "name":"AGE"
                  },
                  "operator":"GREATER_THAN",
                  "value":{
                    "integerValue":"89"
                  }
                }
              ]
            }
          }
        }
      }
    ]
  }
}

Transform findings using a cryptographic hash transformation

The following JSON examples use infoType transformations to instruct the Cloud DLP API to inspect the entire table structure for specific infoTypes, and then to encrypt the matching values using a transient CryptoKey.

The following example demonstrates de-identifying two infoTypes using a cryptographic hash transformation.

Input:

userid comments
user1@example.org my email is user1@example.org and phone is 858-555-0222
user2@example.org my email is user2@example.org and phone is 858-555-0223
user3@example.org my email is user3@example.org and phone is 858-555-0224

Transformed table:

userid comments
1kSfj3Op64MH1BiznupEpX0BdQrHMm62X6abgsPH5zM= my email is 1kSfj3Op64MH1BiznupEpX0BdQrHMm62X6abgsPH5zM= and phone is hYXPcsJNBCe1rr51sHiVw2KhtoyMe4HEFKNHWFcDVm0=
4ESy7+rEN8NVaUJ6J7kwvcgW8wcm0cm5gbBAcu6SfdM= my email is 4ESy7+rEN8NVaUJ6J7kwvcgW8wcm0cm5gbBAcu6SfdM= and phone is KKqW1tQwgvGiC6iWJHhLiz2enNSEFRzhmLOf9fSTxRw=
bu1blyd/mbjLmpF2Rdi6zpgsLatSwpJLVki2fMeudM0= my email is bu1blyd/mbjLmpF2Rdi6zpgsLatSwpJLVki2fMeudM0= and phone is eNt7qtZVLmxRb8z8NBR/+z00In07CI3hEMStbwofWoc=

APIs Explorer example

{
  "inspectConfig":{
    "infoTypes":[
      {
        "name":"EMAIL_ADDRESS"
      },
      {
        "name":"PHONE_NUMBER"
      }
    ]
  },
  "deidentifyConfig":{
    "infoTypeTransformations":{
      "transformations":[
        {
          "infoTypes":[
            {
              "name":"EMAIL_ADDRESS"
            },
            {
              "name":"PHONE_NUMBER"
            }
          ],
          "primitiveTransformation":{
            "cryptoHashConfig":{
              "cryptoKey":{
                "transient":{
                  "name":"[TRANSIENT-CRYPTO-KEY]"
                }
              }
            }
          }
        }
      ]
    }
  },
  "item":{
    "table":{
      "headers":[
        {
          "name":"userid"
        },
        {
          "name":"comments"
        }
      ],
      "rows":[
        {
          "values":[
            {
              "stringValue":"abby_abernathy@example.org"
            },
            {
              "stringValue":"my email is abby_abernathy@example.org and phone is 858-555-0222"
            }
          ]
        },
        {
          "values":[
            {
              "stringValue":"bert_beauregard@example.org"
            },
            {
              "stringValue":"my email is bert_beauregard@example.org and phone is 858-555-0223"
            }
          ]
        },
        {
          "values":[
            {
              "stringValue":"cathy_crenshaw@example.org"
            },
            {
              "stringValue":"my email is cathy_crenshaw@example.org and phone is 858-555-0224"
            }
          ]
        }
      ]
    }
  }
}

Transform findings using two separate cryptographic hash transformations

This example demonstrates how you can use different cryptographic keys in different transformations within a single de-identification configuration. First, a field transformation on the "userid" field is declared. That transformation does not include any infoType transformations, so the "userid" field in each row is transformed, regardless of its data type. Then, another field transformation is declared, this one on the "comments" field.

Input:

userid comments
user1@example.org my email is user1@example.org and phone is 858-555-0222
abbyabernathy1 my userid is abbyabernathy1 and my email is aabernathy@example.com

Transformed table:

userid comments
5WvS4+aJtCCwWWG79cmRNamDgyvJ+CkuwNpA2gaR1VQ= my email is vjqGLaA6+NUUnZAWXpI72lU1GfwQdOKu7XqWaJPcvQQ= and phone is BY+mSXXTu6mOoX5pr0Xbse60uelsSHmwRCq6HcscKtk=
t0dOmHvkT0VsM++SVmESVKHenLkmhBmFezH3hSDldDg= my userid is abbyabernathy1 and my email is TQ3ancdUn9zgwO5qe6ahkmVrBuNhvlMknxjPjIt0N2w=

APIs Explorer example

{
  "inspectConfig":{
    "infoTypes":[
      {
        "name":"EMAIL_ADDRESS"
      },
      {
        "name":"PHONE_NUMBER"
      }
    ]
  },
  "deidentifyConfig":{
    "recordTransformations":{
      "fieldTransformations":[
        {
          "fields":[
            {
              "name":"userid"
            }
          ],
          "primitiveTransformation":{
            "cryptoHashConfig":{
              "cryptoKey":{
                "transient":{
                  "name":"[TRANSIENT-CRYPTO-KEY-1]"
                }
              }
            }
          }
        },
        {
          "fields":[
            {
              "name":"comments"
            }
          ],
          "infoTypeTransformations":{
            "transformations":[
              {
                "infoTypes":[
                  {
                    "name":"PHONE_NUMBER"
                  },
                  {
                    "name":"EMAIL_ADDRESS"
                  }
                ],
                "primitiveTransformation":{
                  "cryptoHashConfig":{
                    "cryptoKey":{
                      "transient":{
                        "name":"[TRANSIENT-CRYPTO-KEY-2]"
                      }
                    }
                  }
                }
              }
            ]
          }
        }
      ]
    }
  },
  "item":{
    "table":{
      "headers":[
        {
          "name":"userid"
        },
        {
          "name":"comments"
        }
      ],
      "rows":[
        {
          "values":[
            {
              "stringValue":"user1@example.org"
            },
            {
              "stringValue":"my email is user1@example.org and phone is 858-333-2222"
            }
          ]
        },
        {
          "values":[
            {
              "stringValue":"abbyabernathy1"
            },
            {
              "stringValue":"my userid is abbyabernathy1 and my email is aabernathy@example.com"
            }
          ]
        }
      ]
    }
  }
}