Demonstrates date shifting of a CSV file.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
C#
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
using System;
using System.IO;
using System.Linq;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
using Google.Protobuf;
public class DeidentifyWithDateShift
{
public static DeidentifyContentResponse Deidentify(
string projectId,
string inputCsvFilePath,
int lowerBoundDays,
int upperBoundDays,
string dateFields,
string contextField,
string keyName,
string wrappedKey)
{
var hasKeyName = !string.IsNullOrEmpty(keyName);
var hasWrappedKey = !string.IsNullOrEmpty(wrappedKey);
var hasContext = !string.IsNullOrEmpty(contextField);
bool allFieldsSet = hasKeyName && hasWrappedKey && hasContext;
bool noFieldsSet = !hasKeyName && !hasWrappedKey && !hasContext;
if (!(allFieldsSet || noFieldsSet))
{
throw new ArgumentException("Must specify ALL or NONE of: {contextFieldId, keyName, wrappedKey}!");
}
var dlp = DlpServiceClient.Create();
// Read file
var csvLines = File.ReadAllLines(inputCsvFilePath);
var csvHeaders = csvLines[0].Split(',');
var csvRows = csvLines.Skip(1).ToArray();
// Convert dates to protobuf format, and everything else to a string
var protoHeaders = csvHeaders.Select(header => new FieldId { Name = header });
var protoRows = csvRows.Select(csvRow =>
{
var rowValues = csvRow.Split(',');
var protoValues = rowValues.Select(rowValue =>
System.DateTime.TryParse(rowValue, out var parsedDate)
? new Value { DateValue = Google.Type.Date.FromDateTime(parsedDate) }
: new Value { StringValue = rowValue });
var rowObject = new Table.Types.Row();
rowObject.Values.Add(protoValues);
return rowObject;
});
var dateFieldList = dateFields
.Split(',')
.Select(field => new FieldId { Name = field });
// Construct + execute the request
var dateShiftConfig = new DateShiftConfig
{
LowerBoundDays = lowerBoundDays,
UpperBoundDays = upperBoundDays
};
dateShiftConfig.Context = new FieldId { Name = contextField };
dateShiftConfig.CryptoKey = new CryptoKey
{
KmsWrapped = new KmsWrappedCryptoKey
{
WrappedKey = ByteString.FromBase64(wrappedKey),
CryptoKeyName = keyName
}
};
var deidConfig = new DeidentifyConfig
{
RecordTransformations = new RecordTransformations
{
FieldTransformations =
{
new FieldTransformation
{
PrimitiveTransformation = new PrimitiveTransformation
{
DateShiftConfig = dateShiftConfig
},
Fields = { dateFieldList }
}
}
}
};
var response = dlp.DeidentifyContent(
new DeidentifyContentRequest
{
Parent = new LocationName(projectId, "global").ToString(),
DeidentifyConfig = deidConfig,
Item = new ContentItem
{
Table = new Table
{
Headers = { protoHeaders },
Rows = { protoRows }
}
}
});
return response;
}
}
Go
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
import (
"context"
"fmt"
"io"
dlp "cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
// deidentifyDateShift shifts dates found in the input between lowerBoundDays and
// upperBoundDays.
func deidentifyDateShift(w io.Writer, projectID string, lowerBoundDays, upperBoundDays int32, input string) error {
// projectID := "my-project-id"
// lowerBoundDays := -1
// upperBound := -1
// input := "2016-01-10"
// Will print "2016-01-09"
ctx := context.Background()
client, err := dlp.NewClient(ctx)
if err != nil {
return fmt.Errorf("dlp.NewClient: %w", err)
}
defer client.Close()
// Create a configured request.
req := &dlppb.DeidentifyContentRequest{
Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
DeidentifyConfig: &dlppb.DeidentifyConfig{
Transformation: &dlppb.DeidentifyConfig_InfoTypeTransformations{
InfoTypeTransformations: &dlppb.InfoTypeTransformations{
Transformations: []*dlppb.InfoTypeTransformations_InfoTypeTransformation{
{
InfoTypes: []*dlppb.InfoType{}, // Match all info types.
PrimitiveTransformation: &dlppb.PrimitiveTransformation{
Transformation: &dlppb.PrimitiveTransformation_DateShiftConfig{
DateShiftConfig: &dlppb.DateShiftConfig{
LowerBoundDays: lowerBoundDays,
UpperBoundDays: upperBoundDays,
},
},
},
},
},
},
},
},
// The InspectConfig is used to identify the DATE fields.
InspectConfig: &dlppb.InspectConfig{
InfoTypes: []*dlppb.InfoType{
{
Name: "DATE",
},
},
},
// The item to analyze.
Item: &dlppb.ContentItem{
DataItem: &dlppb.ContentItem_Value{
Value: input,
},
},
}
// Send the request.
r, err := client.DeidentifyContent(ctx, req)
if err != nil {
return fmt.Errorf("DeidentifyContent: %w", err)
}
// Print the result.
fmt.Fprint(w, r.GetItem().GetValue())
return nil
}
Java
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.common.base.Splitter;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DateShiftConfig;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.FieldTransformation;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Value;
import com.google.type.Date;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class DeIdentifyWithDateShift {
public static void main(String[] args) throws Exception {
// TODO(developer): Replace these variables before running the sample.
String projectId = "your-project-id";
Path inputCsvFile = Paths.get("path/to/your/input/file.csv");
Path outputCsvFile = Paths.get("path/to/your/output/file.csv");
deIdentifyWithDateShift(projectId, inputCsvFile, outputCsvFile);
}
public static void deIdentifyWithDateShift(
String projectId, Path inputCsvFile, Path outputCsvFile) throws IOException {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Read the contents of the CSV file into a Table
List<FieldId> headers;
List<Table.Row> rows;
try (BufferedReader input = Files.newBufferedReader(inputCsvFile)) {
// Parse and convert the first line into header names
headers =
Arrays.stream(input.readLine().split(","))
.map(header -> FieldId.newBuilder().setName(header).build())
.collect(Collectors.toList());
// Parse the remainder of the file as Table.Rows
rows =
input.lines().map(DeIdentifyWithDateShift::parseLineAsRow).collect(Collectors.toList());
}
Table table = Table.newBuilder().addAllHeaders(headers).addAllRows(rows).build();
ContentItem item = ContentItem.newBuilder().setTable(table).build();
// Set the maximum days to shift dates backwards (lower bound) or forward (upper bound)
DateShiftConfig dateShiftConfig =
DateShiftConfig.newBuilder().setLowerBoundDays(5).setUpperBoundDays(5).build();
PrimitiveTransformation transformation =
PrimitiveTransformation.newBuilder().setDateShiftConfig(dateShiftConfig).build();
// Specify which fields the DateShift should apply too
List<FieldId> dateFields = Arrays.asList(headers.get(1), headers.get(3));
FieldTransformation fieldTransformation =
FieldTransformation.newBuilder()
.addAllFields(dateFields)
.setPrimitiveTransformation(transformation)
.build();
RecordTransformations recordTransformations =
RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build();
// Specify the config for the de-identify request
DeidentifyConfig deidentifyConfig =
DeidentifyConfig.newBuilder().setRecordTransformations(recordTransformations).build();
// Combine configurations into a request for the service.
DeidentifyContentRequest request =
DeidentifyContentRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setItem(item)
.setDeidentifyConfig(deidentifyConfig)
.build();
// Send the request and receive response from the service
DeidentifyContentResponse response = dlp.deidentifyContent(request);
// Write the results to the target CSV file
try (BufferedWriter writer = Files.newBufferedWriter(outputCsvFile)) {
Table outTable = response.getItem().getTable();
String headerOut =
outTable.getHeadersList().stream()
.map(FieldId::getName)
.collect(Collectors.joining(","));
writer.write(headerOut + "\n");
List<String> rowOutput =
outTable.getRowsList().stream()
.map(row -> joinRow(row.getValuesList()))
.collect(Collectors.toList());
for (String line : rowOutput) {
writer.write(line + "\n");
}
System.out.println("Content written to file: " + outputCsvFile.toString());
}
}
}
// Convert the string from the csv file into com.google.type.Date
public static Date parseAsDate(String s) {
LocalDate date = LocalDate.parse(s, DateTimeFormatter.ofPattern("MM/dd/yyyy"));
return Date.newBuilder()
.setDay(date.getDayOfMonth())
.setMonth(date.getMonthValue())
.setYear(date.getYear())
.build();
}
// Each row is in the format: Name,BirthDate,CreditCardNumber,RegisterDate
public static Table.Row parseLineAsRow(String line) {
List<String> values = Splitter.on(",").splitToList(line);
Value name = Value.newBuilder().setStringValue(values.get(0)).build();
Value birthDate = Value.newBuilder().setDateValue(parseAsDate(values.get(1))).build();
Value creditCardNumber = Value.newBuilder().setStringValue(values.get(2)).build();
Value registerDate = Value.newBuilder().setDateValue(parseAsDate(values.get(3))).build();
return Table.Row.newBuilder()
.addValues(name)
.addValues(birthDate)
.addValues(creditCardNumber)
.addValues(registerDate)
.build();
}
public static String formatDate(Date d) {
return String.format("%s/%s/%s", d.getMonth(), d.getDay(), d.getYear());
}
public static String joinRow(List<Value> values) {
String name = values.get(0).getStringValue();
String birthDate = formatDate(values.get(1).getDateValue());
String creditCardNumber = values.get(2).getStringValue();
String registerDate = formatDate(values.get(3).getDateValue());
return String.join(",", name, birthDate, creditCardNumber, registerDate);
}
}
Node.js
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');
// Instantiates a client
const dlp = new DLP.DlpServiceClient();
// Import other required libraries
const fs = require('fs');
// The project ID to run the API call under
// const projectId = 'my-project';
// The path to the CSV file to deidentify
// The first row of the file must specify column names, and all other rows
// must contain valid values
// const inputCsvFile = '/path/to/input/file.csv';
// The path to save the date-shifted CSV file to
// const outputCsvFile = '/path/to/output/file.csv';
// The list of (date) fields in the CSV file to date shift
// const dateFields = [{ name: 'birth_date'}, { name: 'register_date' }];
// The maximum number of days to shift a date backward
// const lowerBoundDays = 1;
// The maximum number of days to shift a date forward
// const upperBoundDays = 1;
// (Optional) The column to determine date shift amount based on
// If this is not specified, a random shift amount will be used for every row
// If this is specified, then 'wrappedKey' and 'keyName' must also be set
// const contextFieldId = [{ name: 'user_id' }];
// (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key
// If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set
// const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME';
// (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
// This key should be encrypted using the Cloud KMS key specified above
// If this is specified, then 'keyName' and 'contextFieldId' must also be set
// const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY'
// Helper function for converting CSV rows to Protobuf types
const rowToProto = row => {
const values = row.split(',');
const convertedValues = values.map(value => {
if (Date.parse(value)) {
const date = new Date(value);
return {
dateValue: {
year: date.getFullYear(),
month: date.getMonth() + 1,
day: date.getDate(),
},
};
} else {
// Convert all non-date values to strings
return {stringValue: value.toString()};
}
});
return {values: convertedValues};
};
async function deidentifyWithDateShift() {
// Read and parse a CSV file
const csvLines = fs
.readFileSync(inputCsvFile)
.toString()
.split('\n')
.filter(line => line.includes(','));
const csvHeaders = csvLines[0].split(',');
const csvRows = csvLines.slice(1);
// Construct the table object
const tableItem = {
table: {
headers: csvHeaders.map(header => {
return {name: header};
}),
rows: csvRows.map(row => rowToProto(row)),
},
};
// Construct DateShiftConfig
const dateShiftConfig = {
lowerBoundDays: lowerBoundDays,
upperBoundDays: upperBoundDays,
};
if (contextFieldId && keyName && wrappedKey) {
dateShiftConfig.context = {name: contextFieldId};
dateShiftConfig.cryptoKey = {
kmsWrapped: {
wrappedKey: wrappedKey,
cryptoKeyName: keyName,
},
};
} else if (contextFieldId || keyName || wrappedKey) {
throw new Error(
'You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!'
);
}
// Construct deidentification request
const request = {
parent: `projects/${projectId}/locations/global`,
deidentifyConfig: {
recordTransformations: {
fieldTransformations: [
{
fields: dateFields,
primitiveTransformation: {
dateShiftConfig: dateShiftConfig,
},
},
],
},
},
item: tableItem,
};
// Run deidentification request
const [response] = await dlp.deidentifyContent(request);
const tableRows = response.item.table.rows;
// Write results to a CSV file
tableRows.forEach((row, rowIndex) => {
const rowValues = row.values.map(
value =>
value.stringValue ||
`${value.dateValue.month}/${value.dateValue.day}/${value.dateValue.year}`
);
csvLines[rowIndex + 1] = rowValues.join(',');
});
csvLines.push('');
fs.writeFileSync(outputCsvFile, csvLines.join('\n'));
// Print status
console.log(`Successfully saved date-shift output to ${outputCsvFile}`);
}
deidentifyWithDateShift();
PHP
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
use DateTime;
use Exception;
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\CryptoKey;
use Google\Cloud\Dlp\V2\DateShiftConfig;
use Google\Cloud\Dlp\V2\DeidentifyConfig;
use Google\Cloud\Dlp\V2\DeidentifyContentRequest;
use Google\Cloud\Dlp\V2\FieldId;
use Google\Cloud\Dlp\V2\FieldTransformation;
use Google\Cloud\Dlp\V2\KmsWrappedCryptoKey;
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
use Google\Cloud\Dlp\V2\RecordTransformations;
use Google\Cloud\Dlp\V2\Table;
use Google\Cloud\Dlp\V2\Table\Row;
use Google\Cloud\Dlp\V2\Value;
use Google\Type\Date;
/**
* Deidentify dates in a CSV file by pseudorandomly shifting them.
* If contextFieldName is not specified, a random shift amount will be used for every row.
* If contextFieldName is specified, then 'wrappedKey' and 'keyName' must also be set.
*
* @param string $callingProjectId The GCP Project ID to run the API call under
* @param string $inputCsvFile The path to the CSV file to deidentify
* @param string $outputCsvFile The path to save the date-shifted CSV file to
* @param string $dateFieldNames The comma-separated list of (date) fields in the CSV file to date shift
* @param int $lowerBoundDays The maximum number of days to shift a date backward
* @param int $upperBoundDays The maximum number of days to shift a date forward
* @param string $contextFieldName (Optional) The column to determine date shift amount based on
* @param string $keyName (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
* @param string $wrappedKey (Optional) The name of the Cloud KMS key used to encrypt (wrap) the AES-256 key
*/
function deidentify_dates(
string $callingProjectId,
string $inputCsvFile,
string $outputCsvFile,
string $dateFieldNames,
int $lowerBoundDays,
int $upperBoundDays,
string $contextFieldName = '',
string $keyName = '',
string $wrappedKey = ''
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
// Read a CSV file
$csvLines = file($inputCsvFile, FILE_IGNORE_NEW_LINES);
$csvHeaders = explode(',', $csvLines[0]);
$csvRows = array_slice($csvLines, 1);
// Convert CSV file into protobuf objects
$tableHeaders = array_map(function ($csvHeader) {
return (new FieldId)->setName($csvHeader);
}, $csvHeaders);
$tableRows = array_map(function ($csvRow) {
$rowValues = array_map(function ($csvValue) {
if ($csvDate = DateTime::createFromFormat('m/d/Y', $csvValue)) {
$date = (new Date())
->setYear((int) $csvDate->format('Y'))
->setMonth((int) $csvDate->format('m'))
->setDay((int) $csvDate->format('d'));
return (new Value())
->setDateValue($date);
} else {
return (new Value())
->setStringValue($csvValue);
}
}, explode(',', $csvRow));
return (new Row())
->setValues($rowValues);
}, $csvRows);
// Convert date fields into protobuf objects
$dateFields = array_map(function ($dateFieldName) {
return (new FieldId())->setName($dateFieldName);
}, explode(',', $dateFieldNames));
// Construct the table object
$table = (new Table())
->setHeaders($tableHeaders)
->setRows($tableRows);
$item = (new ContentItem())
->setTable($table);
// Construct dateShiftConfig
$dateShiftConfig = (new DateShiftConfig())
->setLowerBoundDays($lowerBoundDays)
->setUpperBoundDays($upperBoundDays);
if ($contextFieldName && $keyName && $wrappedKey) {
$contextField = (new FieldId())
->setName($contextFieldName);
// Create the wrapped crypto key configuration object
$kmsWrappedCryptoKey = (new KmsWrappedCryptoKey())
->setWrappedKey(base64_decode($wrappedKey))
->setCryptoKeyName($keyName);
$cryptoKey = (new CryptoKey())
->setKmsWrapped($kmsWrappedCryptoKey);
$dateShiftConfig
->setContext($contextField)
->setCryptoKey($cryptoKey);
} elseif ($contextFieldName || $keyName || $wrappedKey) {
throw new Exception('You must set either ALL or NONE of {$contextFieldName, $keyName, $wrappedKey}!');
}
// Create the information transform configuration objects
$primitiveTransformation = (new PrimitiveTransformation())
->setDateShiftConfig($dateShiftConfig);
$fieldTransformation = (new FieldTransformation())
->setPrimitiveTransformation($primitiveTransformation)
->setFields($dateFields);
$recordTransformations = (new RecordTransformations())
->setFieldTransformations([$fieldTransformation]);
// Create the deidentification configuration object
$deidentifyConfig = (new DeidentifyConfig())
->setRecordTransformations($recordTransformations);
$parent = "projects/$callingProjectId/locations/global";
// Run request
$deidentifyContentRequest = (new DeidentifyContentRequest())
->setParent($parent)
->setDeidentifyConfig($deidentifyConfig)
->setItem($item);
$response = $dlp->deidentifyContent($deidentifyContentRequest);
// Check for errors
foreach ($response->getOverview()->getTransformationSummaries() as $summary) {
foreach ($summary->getResults() as $result) {
if ($details = $result->getDetails()) {
printf('Error: %s' . PHP_EOL, $details);
return;
}
}
}
// Save the results to a file
$csvRef = fopen($outputCsvFile, 'w');
fputcsv($csvRef, $csvHeaders);
foreach ($response->getItem()->getTable()->getRows() as $tableRow) {
$values = array_map(function ($tableValue) {
if ($tableValue->getStringValue()) {
return $tableValue->getStringValue();
}
$protoDate = $tableValue->getDateValue();
$date = mktime(0, 0, 0, $protoDate->getMonth(), $protoDate->getDay(), $protoDate->getYear());
return strftime('%D', $date);
}, iterator_to_array($tableRow->getValues()));
fputcsv($csvRef, $values);
};
fclose($csvRef);
printf('Deidentified dates written to %s' . PHP_EOL, $outputCsvFile);
}
Python
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.
import base64
import csv
from datetime import datetime
from typing import List
import google.cloud.dlp
from google.cloud.dlp_v2 import types
def deidentify_with_date_shift(
project: str,
input_csv_file: str = None,
output_csv_file: str = None,
date_fields: List[str] = None,
lower_bound_days: int = None,
upper_bound_days: int = None,
context_field_id: str = None,
wrapped_key: str = None,
key_name: str = None,
) -> None:
"""Uses the Data Loss Prevention API to deidentify dates in a CSV file by
pseudorandomly shifting them.
Args:
project: The Google Cloud project id to use as a parent resource.
input_csv_file: The path to the CSV file to deidentify. The first row
of the file must specify column names, and all other rows must
contain valid values.
output_csv_file: The path to save the date-shifted CSV file.
date_fields: The list of (date) fields in the CSV file to date shift.
Example: ['birth_date', 'register_date']
lower_bound_days: The maximum number of days to shift a date backward
upper_bound_days: The maximum number of days to shift a date forward
context_field_id: (Optional) The column to determine date shift amount
based on. If this is not specified, a random shift amount will be
used for every row. If this is specified, then 'wrappedKey' and
'keyName' must also be set. Example:
contextFieldId = [{ 'name': 'user_id' }]
key_name: (Optional) The name of the Cloud KMS key used to encrypt
('wrap') the AES-256 key. Example:
key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
wrapped_key: (Optional) The encrypted ('wrapped') AES-256 key to use.
This key should be encrypted using the Cloud KMS key specified by
key_name.
Returns:
None; the response from the API is printed to the terminal.
"""
# Instantiate a client
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Convert the project id into a full resource id.
parent = f"projects/{project}/locations/global"
# Convert date field list to Protobuf type
def map_fields(field: str) -> dict:
return {"name": field}
if date_fields:
date_fields = map(map_fields, date_fields)
else:
date_fields = []
f = []
with open(input_csv_file) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
f.append(row)
# Helper function for converting CSV rows to Protobuf types
def map_headers(header: str) -> dict:
return {"name": header}
def map_data(value: str) -> dict:
try:
date = datetime.strptime(value, "%m/%d/%Y")
return {
"date_value": {"year": date.year, "month": date.month, "day": date.day}
}
except ValueError:
return {"string_value": value}
def map_rows(row: str) -> dict:
return {"values": map(map_data, row)}
# Using the helper functions, convert CSV rows to protobuf-compatible
# dictionaries.
csv_headers = map(map_headers, f[0])
csv_rows = map(map_rows, f[1:])
# Construct the table dict
table_item = {"table": {"headers": csv_headers, "rows": csv_rows}}
# Construct date shift config
date_shift_config = {
"lower_bound_days": lower_bound_days,
"upper_bound_days": upper_bound_days,
}
# If using a Cloud KMS key, add it to the date_shift_config.
# The wrapped key is base64-encoded, but the library expects a binary
# string, so decode it here.
if context_field_id and key_name and wrapped_key:
date_shift_config["context"] = {"name": context_field_id}
date_shift_config["crypto_key"] = {
"kms_wrapped": {
"wrapped_key": base64.b64decode(wrapped_key),
"crypto_key_name": key_name,
}
}
elif context_field_id or key_name or wrapped_key:
raise ValueError(
"""You must set either ALL or NONE of
[context_field_id, key_name, wrapped_key]!"""
)
# Construct Deidentify Config
deidentify_config = {
"record_transformations": {
"field_transformations": [
{
"fields": date_fields,
"primitive_transformation": {
"date_shift_config": date_shift_config
},
}
]
}
}
# Write to CSV helper methods
def write_header(header: types.storage.FieldId) -> str:
return header.name
def write_data(data: types.storage.Value) -> str:
return data.string_value or "{}/{}/{}".format(
data.date_value.month,
data.date_value.day,
data.date_value.year,
)
# Call the API
response = dlp.deidentify_content(
request={
"parent": parent,
"deidentify_config": deidentify_config,
"item": table_item,
}
)
# Write results to CSV file
with open(output_csv_file, "w") as csvfile:
write_file = csv.writer(csvfile, delimiter=",")
write_file.writerow(map(write_header, response.item.table.headers))
for row in response.item.table.rows:
write_file.writerow(map(write_data, row.values))
# Print status
print(f"Successfully saved date-shift output to {output_csv_file}")
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.