De-identify table data with infoTypes

Transform findings found in columns. You can transform findings that either make up part of a cell's content or all of it. In this example, all instances of PERSON_NAME are anonymized.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


using System;
using System.Collections.Generic;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;

public class DeidentifyTableWithInfoTypes
{
    public static Table DeidentifyTable(
        string projectId,
        Table tableToInspect = null,
        IEnumerable<InfoType> infoTypes = null)
    {
        // Instantiate a client.
        var dlp = DlpServiceClient.Create();

        // Construct the table if null.
        if (tableToInspect == null)
        {
            var row1 = new Value[]
            {
                new Value { StringValue = "101" },
                new Value { StringValue = "Charles Dickens" },
                new Value { StringValue = "95" },
                new Value { StringValue = "Charles Dickens name was a curse invented by Shakespeare." }
            };
            var row2 = new Value[]
            {
                new Value { StringValue = "22" },
                new Value { StringValue = "Jane Austin" },
                new Value { StringValue = "21" },
                new Value { StringValue = "There are 14 kisses in Jane Austen's novels." }
            };
            var row3 = new Value[]
            {
                new Value { StringValue = "55" },
                new Value { StringValue = "Mark Twain" },
                new Value { StringValue = "75" },
                new Value { StringValue = "Mark Twain loved cats." }
            };

            tableToInspect = new Table
            {
                Headers =
                {
                    new FieldId { Name = "AGE" },
                    new FieldId { Name = "PATIENT" },
                    new FieldId { Name = "HAPPINESS SCORE" },
                    new FieldId { Name = "FACTOID" }
                },
                Rows =
                {
                    new Table.Types.Row { Values = { row1 } },
                    new Table.Types.Row { Values = { row2 } },
                    new Table.Types.Row { Values = { row3 } }
                }
            };
        }

        // Construct the table content item.
        var contentItem = new ContentItem { Table = tableToInspect };

        // Construct Replace With InfoTypes config to replace the match.
        var replaceInfoTypesConfig = new ReplaceWithInfoTypeConfig();

        // Construct Fields to be de-identified.
        var fieldIds = new FieldId[] { new FieldId { Name = "PATIENT" }, new FieldId { Name = "FACTOID" } };

        // Construct InfoType Transformation.
        var infoTypeTransformations = new InfoTypeTransformations
        {
            Transformations =
            {
                new InfoTypeTransformations.Types.InfoTypeTransformation
                {
                    PrimitiveTransformation = new PrimitiveTransformation
                    {
                        ReplaceWithInfoTypeConfig = replaceInfoTypesConfig
                    },
                    InfoTypes = { infoTypes ?? new InfoType[] { new InfoType { Name = "PERSON_NAME" } } }
                }
            }
        };

        // Construct the de-identify config using replace config.
        var deidentifyConfig = new DeidentifyConfig
        {
            RecordTransformations = new RecordTransformations
            {
                FieldTransformations =
                {
                    new FieldTransformation
                    {
                        InfoTypeTransformations = infoTypeTransformations,
                        Fields = { fieldIds }
                    }
                }
            }
        };

        // Construct the request.
        var request = new DeidentifyContentRequest
        {
            ParentAsLocationName = new LocationName(projectId, "global"),
            DeidentifyConfig = deidentifyConfig,
            Item = contentItem
        };

        // Call the API.
        var response = dlp.DeidentifyContent(request);

        // Inspect the response.
        Console.WriteLine(response.Item.Table);
        return response.Item.Table;
    }
}

Go

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"

	dlp "cloud.google.com/go/dlp/apiv2"
	"cloud.google.com/go/dlp/apiv2/dlppb"
)

// deidentifyTableInfotypes de-identifies table data with info types
func deidentifyTableInfotypes(w io.Writer, projectID string) error {
	// projectId := "your-project-id"

	row1 := &dlppb.Table_Row{
		Values: []*dlppb.Value{
			{Type: &dlppb.Value_StringValue{StringValue: "22"}},
			{Type: &dlppb.Value_StringValue{StringValue: "Jane Austen"}},
			{Type: &dlppb.Value_StringValue{StringValue: "21"}},
			{Type: &dlppb.Value_StringValue{StringValue: "There are 14 kisses in Jane Austen's novels."}},
		},
	}

	row2 := &dlppb.Table_Row{
		Values: []*dlppb.Value{
			{Type: &dlppb.Value_StringValue{StringValue: "55"}},
			{Type: &dlppb.Value_StringValue{StringValue: "Mark Twain"}},
			{Type: &dlppb.Value_StringValue{StringValue: "75"}},
			{Type: &dlppb.Value_StringValue{StringValue: "Mark Twain loved cats."}},
		},
	}

	row3 := &dlppb.Table_Row{
		Values: []*dlppb.Value{
			{Type: &dlppb.Value_StringValue{StringValue: "101"}},
			{Type: &dlppb.Value_StringValue{StringValue: "Charles Dickens"}},
			{Type: &dlppb.Value_StringValue{StringValue: "95"}},
			{Type: &dlppb.Value_StringValue{StringValue: "Charles Dickens name was a curse invented by Shakespeare."}},
		},
	}

	table := &dlppb.Table{
		Headers: []*dlppb.FieldId{
			{Name: "AGE"},
			{Name: "PATIENT"},
			{Name: "HAPPINESS SCORE"},
			{Name: "FACTOID"},
		},
		Rows: []*dlppb.Table_Row{
			{Values: row1.Values},
			{Values: row2.Values},
			{Values: row3.Values},
		},
	}

	ctx := context.Background()

	// Initialize a client once and reuse it to send multiple requests. Clients
	// are safe to use across goroutines. When the client is no longer needed,
	// call the Close method to cleanup its resources.
	client, err := dlp.NewRESTClient(ctx)
	if err != nil {
		return err
	}

	// Closing the client safely cleans up background resources.
	defer client.Close()

	// Specify what content you want the service to de-identify.
	contentItem := &dlppb.ContentItem{
		DataItem: &dlppb.ContentItem_Table{
			Table: table,
		},
	}

	// Specify how the content should be de-identified.
	// Select type of info to be replaced.
	infoTypes := []*dlppb.InfoType{
		{Name: "PERSON_NAME"},
	}

	// Specify that findings should be replaced with corresponding info type name.
	replaceWithInfoTypeConfig := &dlppb.ReplaceWithInfoTypeConfig{}
	primitiveTransformation := &dlppb.PrimitiveTransformation{
		Transformation: &dlppb.PrimitiveTransformation_ReplaceWithInfoTypeConfig{
			ReplaceWithInfoTypeConfig: replaceWithInfoTypeConfig,
		},
	}

	// Associate info type with the replacement strategy
	infoTypeTransformations := &dlppb.InfoTypeTransformations{
		Transformations: []*dlppb.InfoTypeTransformations_InfoTypeTransformation{
			{
				InfoTypes:               infoTypes,
				PrimitiveTransformation: primitiveTransformation,
			},
		},
	}

	// Specify fields to be de-identified.
	fields := []*dlppb.FieldId{
		{Name: "PATIENT"},
		{Name: "FACTOID"},
	}

	// Associate the de-identification and conditions with the specified field.
	fieldTransformation := &dlppb.FieldTransformation{
		Fields: fields,
		Transformation: &dlppb.FieldTransformation_InfoTypeTransformations{
			InfoTypeTransformations: infoTypeTransformations,
		},
	}

	recordTransformations := &dlppb.RecordTransformations{
		FieldTransformations: []*dlppb.FieldTransformation{
			fieldTransformation,
		},
	}

	// Construct the de-identification request to be sent by the client.
	req := &dlppb.DeidentifyContentRequest{
		Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
		DeidentifyConfig: &dlppb.DeidentifyConfig{
			Transformation: &dlppb.DeidentifyConfig_RecordTransformations{
				RecordTransformations: recordTransformations,
			},
		},
		Item: contentItem,
	}

	// Send the request.
	resp, err := client.DeidentifyContent(ctx, req)
	if err != nil {
		return err
	}

	// Print the results.
	fmt.Fprintf(w, "Table after de-identification : %v", resp.GetItem().GetTable())
	return nil
}

Java

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.FieldTransformation;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.ReplaceWithInfoTypeConfig;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class DeIdentifyTableInfoTypes {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    Table tableToDeIdentify =
        Table.newBuilder()
            .addHeaders(FieldId.newBuilder().setName("AGE").build())
            .addHeaders(FieldId.newBuilder().setName("PATIENT").build())
            .addHeaders(FieldId.newBuilder().setName("HAPPINESS SCORE").build())
            .addHeaders(FieldId.newBuilder().setName("FACTOID").build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("101").build())
                    .addValues(Value.newBuilder().setStringValue("Charles Dickens").build())
                    .addValues(Value.newBuilder().setStringValue("95").build())
                    .addValues(
                        Value.newBuilder()
                            .setStringValue(
                                "Charles Dickens name was a curse invented by Shakespeare.")
                            .build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("22").build())
                    .addValues(Value.newBuilder().setStringValue("Jane Austen").build())
                    .addValues(Value.newBuilder().setStringValue("21").build())
                    .addValues(
                        Value.newBuilder()
                            .setStringValue("There are 14 kisses in Jane Austen's novels.")
                            .build())
                    .build())
            .addRows(
                Row.newBuilder()
                    .addValues(Value.newBuilder().setStringValue("55").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain").build())
                    .addValues(Value.newBuilder().setStringValue("75").build())
                    .addValues(Value.newBuilder().setStringValue("Mark Twain loved cats.").build())
                    .build())
            .build();

    deIdentifyTableInfoTypes(projectId, tableToDeIdentify);
  }

  public static Table deIdentifyTableInfoTypes(String projectId, Table tableToDeIdentify)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify what content you want the service to de-identify.
      ContentItem contentItem = ContentItem.newBuilder().setTable(tableToDeIdentify).build();

      // Specify how the content should be de-identified.
      // Select type of info to be replaced.
      InfoType infoType = InfoType.newBuilder().setName("PERSON_NAME").build();
      // Specify that findings should be replaced with corresponding info type name.
      ReplaceWithInfoTypeConfig replaceWithInfoTypeConfig =
          ReplaceWithInfoTypeConfig.getDefaultInstance();
      PrimitiveTransformation primitiveTransformation =
          PrimitiveTransformation.newBuilder()
              .setReplaceWithInfoTypeConfig(replaceWithInfoTypeConfig)
              .build();
      // Associate info type with the replacement strategy
      InfoTypeTransformation infoTypeTransformation =
          InfoTypeTransformation.newBuilder()
              .addInfoTypes(infoType)
              .setPrimitiveTransformation(primitiveTransformation)
              .build();
      InfoTypeTransformations infoTypeTransformations =
          InfoTypeTransformations.newBuilder().addTransformations(infoTypeTransformation).build();

      // Specify fields to be de-identified.
      List<FieldId> fieldIds =
          Stream.of("PATIENT", "FACTOID")
              .map(id -> FieldId.newBuilder().setName(id).build())
              .collect(Collectors.toList());

      // Associate the de-identification and conditions with the specified field.
      FieldTransformation fieldTransformation =
          FieldTransformation.newBuilder()
              .setInfoTypeTransformations(infoTypeTransformations)
              .addAllFields(fieldIds)
              .build();
      RecordTransformations transformations =
          RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build();

      DeidentifyConfig deidentifyConfig =
          DeidentifyConfig.newBuilder().setRecordTransformations(transformations).build();

      // Combine configurations into a request for the service.
      DeidentifyContentRequest request =
          DeidentifyContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(contentItem)
              .setDeidentifyConfig(deidentifyConfig)
              .build();

      // Send the request and receive response from the service.
      DeidentifyContentResponse response = dlp.deidentifyContent(request);

      // Print the results.
      System.out.println("Table after de-identification: " + response.getItem().getTable());

      return response.getItem().getTable();
    }
  }
}

Node.js

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Initialize google DLP Client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const projectId = 'my-project';

// Construct the tabular data
const tablularData = {
  headers: [
    {name: 'AGE'},
    {name: 'PATIENT'},
    {name: 'HAPPINESS SCORE'},
    {name: 'FACTOID'},
  ],
  rows: [
    {
      values: [
        {integerValue: 101},
        {stringValue: 'Charles Dickens'},
        {integerValue: 95},
        {
          stringValue:
            'Charles Dickens name was a curse invented by Shakespeare.',
        },
      ],
    },
    {
      values: [
        {integerValue: 22},
        {stringValue: 'Jane Austen'},
        {integerValue: 21},
        {stringValue: "There are 14 kisses in Jane Austen's novels."},
      ],
    },
    {
      values: [
        {integerValue: 55},
        {stringValue: 'Mark Twain'},
        {integerValue: 75},
        {stringValue: 'Mark Twain loved cats.'},
      ],
    },
  ],
};

async function deIdentifyTableWithInfoType() {
  // Column that needs to be transformed
  const fieldIds = [{name: 'PATIENT'}, {name: 'FACTOID'}];

  // Construct InfoTypeTransformations configurations
  const infoTypeTransformations = {
    transformations: [
      {
        infoTypes: [{name: 'PERSON_NAME'}],
        primitiveTransformation: {
          replaceWithInfoTypeConfig: {},
        },
      },
    ],
  };

  // Combine configurations into a request for the service.
  const request = {
    parent: `projects/${projectId}/locations/global`,
    item: {
      table: tablularData,
    },
    deidentifyConfig: {
      recordTransformations: {
        fieldTransformations: [
          {
            infoTypeTransformations,
            fields: fieldIds,
          },
        ],
      },
    },
  };
  // Send the request and receive response from the service
  const [response] = await dlp.deidentifyContent(request);

  // Print the results
  console.log(
    `Table after de-identification: ${JSON.stringify(response.item.table)}`
  );
}

deIdentifyTableWithInfoType();

PHP

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\DeidentifyConfig;
use Google\Cloud\Dlp\V2\DeidentifyContentRequest;
use Google\Cloud\Dlp\V2\FieldId;
use Google\Cloud\Dlp\V2\FieldTransformation;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InfoTypeTransformations;
use Google\Cloud\Dlp\V2\InfoTypeTransformations\InfoTypeTransformation;
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
use Google\Cloud\Dlp\V2\RecordTransformations;
use Google\Cloud\Dlp\V2\ReplaceWithInfoTypeConfig;
use Google\Cloud\Dlp\V2\Table;
use Google\Cloud\Dlp\V2\Table\Row;
use Google\Cloud\Dlp\V2\Value;

/**
 * De-identify table data with infoTypes
 *
 * @param string $callingProjectId      The Google Cloud project id to use as a parent resource.
 * @param string $inputCsvFile          The input file(csv) path  to deidentify
 * @param string $outputCsvFile         The oupt file path to save deidentify content
 */

function deidentify_table_infotypes(
    // TODO(developer): Replace sample parameters before running the code.
    string $callingProjectId,
    string $inputCsvFile = './test/data/table1.csv',
    string $outputCsvFile = './test/data/deidentify_table_infotypes_output.csv'
): void {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    $parent = "projects/$callingProjectId/locations/global";

    // Read a CSV file
    $csvLines = file($inputCsvFile, FILE_IGNORE_NEW_LINES);
    $csvHeaders = explode(',', $csvLines[0]);
    $csvRows = array_slice($csvLines, 1);

    // Convert CSV file into protobuf objects
    $tableHeaders = array_map(function ($csvHeader) {
        return (new FieldId)
            ->setName($csvHeader);
    }, $csvHeaders);

    $tableRows = array_map(function ($csvRow) {
        $rowValues = array_map(function ($csvValue) {
            return (new Value())
                ->setStringValue($csvValue);
        }, explode(',', $csvRow));
        return (new Row())
            ->setValues($rowValues);
    }, $csvRows);

    // Construct the table object
    $tableToDeIdentify = (new Table())
        ->setHeaders($tableHeaders)
        ->setRows($tableRows);

    // Specify the content to be inspected.
    $content = (new ContentItem())
        ->setTable($tableToDeIdentify);

    // Specify the type of info the inspection will look for.
    $personNameInfoType = (new InfoType())
        ->setName('PERSON_NAME');

    // Specify that findings should be replaced with corresponding info type name.
    $primitiveTransformation = (new PrimitiveTransformation())
        ->setReplaceWithInfoTypeConfig(new ReplaceWithInfoTypeConfig());

    // Associate info type with the replacement strategy
    $infoTypeTransformation = (new InfoTypeTransformation())
        ->setPrimitiveTransformation($primitiveTransformation)
        ->setInfoTypes([$personNameInfoType]);

    $infoTypeTransformations = (new InfoTypeTransformations())
        ->setTransformations([$infoTypeTransformation]);

    // Specify fields to be de-identified.
    $fieldIds = [
        (new FieldId())->setName('PATIENT'),
        (new FieldId())->setName('FACTOID'),
    ];

    // Associate the de-identification and transformation with the specified fields.
    $fieldTransformation = (new FieldTransformation())
        ->setInfoTypeTransformations($infoTypeTransformations)
        ->setFields($fieldIds);

    $recordtransformations = (new RecordTransformations())
        ->setFieldTransformations([$fieldTransformation]);

    $deidentifyConfig = (new DeidentifyConfig())
        ->setRecordTransformations($recordtransformations);

    // Run request
    $deidentifyContentRequest = (new DeidentifyContentRequest())
        ->setParent($parent)
        ->setDeidentifyConfig($deidentifyConfig)
        ->setItem($content);
    $response = $dlp->deidentifyContent($deidentifyContentRequest);

    // Print the results
    $csvRef = fopen($outputCsvFile, 'w');
    fputcsv($csvRef, $csvHeaders);
    foreach ($response->getItem()->getTable()->getRows() as $tableRow) {
        $values = array_map(function ($tableValue) {
            return $tableValue->getStringValue();
        }, iterator_to_array($tableRow->getValues()));
        fputcsv($csvRef, $values);
    };
    printf('After de-identify the table data (Output File Location): %s', $outputCsvFile);
}

Python

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

from typing import Dict, List, Union

import google.cloud.dlp


def deidentify_table_replace_with_info_types(
    project: str,
    table_data: Dict[str, Union[List[str], List[List[str]]]],
    info_types: List[str],
    deid_content_list: List[str],
) -> None:
    """ Uses the Data Loss Prevention API to de-identify sensitive data in a
      table by replacing them with info type.

    Args:
        project: The Google Cloud project id to use as a parent resource.
        table_data: Json string representing table data.
        info_types: A list of strings representing info types to look for.
            A full list of info type categories can be fetched from the API.
        deid_content_list: A list of fields in table to de-identify

    Returns:
        None; the response from the API is printed to the terminal.

    Example:
    >> $ python deidentify_table_infotypes.py \
    '{
        "header": ["name", "email", "phone number"],
        "rows": [
            ["Robert Frost", "robertfrost@example.com", "4232342345"],
            ["John Doe", "johndoe@example.com", "4253458383"]
        ]
    }' \
    ["PERSON_NAME"] ["name"]
    >> '{
            "header": ["name", "email", "phone number"],
            "rows": [
                ["[PERSON_NAME]", "robertfrost@example.com", "4232342345"],
                ["[PERSON_NAME]", "johndoe@example.com", "4253458383"]
            ]
        }'
    """

    # Instantiate a client.
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Construct the `table`. For more details on the table schema, please see
    # https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
    headers = [{"name": val} for val in table_data["header"]]
    rows = []
    for row in table_data["rows"]:
        rows.append({"values": [{"string_value": cell_val} for cell_val in row]})

    table = {"headers": headers, "rows": rows}

    # Construct item
    item = {"table": table}

    # Specify fields to be de-identified
    deid_content_list = [{"name": _i} for _i in deid_content_list]

    # Construct inspect configuration dictionary
    inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}

    # Construct deidentify configuration dictionary
    deidentify_config = {
        "record_transformations": {
            "field_transformations": [
                {
                    "info_type_transformations": {
                        "transformations": [
                            {
                                "primitive_transformation": {
                                    "replace_with_info_type_config": {}
                                }
                            }
                        ]
                    },
                    "fields": deid_content_list,
                }
            ]
        }
    }

    # Convert the project id into a full resource id.
    parent = f"projects/{project}/locations/global"

    # Call the API.
    response = dlp.deidentify_content(
        request={
            "parent": parent,
            "deidentify_config": deidentify_config,
            "item": item,
            "inspect_config": inspect_config,
        }
    )

    # Print the result
    print(f"Table after de-identification: {response.item.table}")

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.