展示如何对 CSV 文件进行日期转换。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
C#
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
using System;
using System.IO;
using System.Linq;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
using Google.Protobuf;
public class DeidentifyWithDateShift
{
public static DeidentifyContentResponse Deidentify(
string projectId,
string inputCsvFilePath,
int lowerBoundDays,
int upperBoundDays,
string dateFields,
string contextField,
string keyName,
string wrappedKey)
{
var hasKeyName = !string.IsNullOrEmpty(keyName);
var hasWrappedKey = !string.IsNullOrEmpty(wrappedKey);
var hasContext = !string.IsNullOrEmpty(contextField);
bool allFieldsSet = hasKeyName && hasWrappedKey && hasContext;
bool noFieldsSet = !hasKeyName && !hasWrappedKey && !hasContext;
if (!(allFieldsSet || noFieldsSet))
{
throw new ArgumentException("Must specify ALL or NONE of: {contextFieldId, keyName, wrappedKey}!");
}
var dlp = DlpServiceClient.Create();
// Read file
var csvLines = File.ReadAllLines(inputCsvFilePath);
var csvHeaders = csvLines[0].Split(',');
var csvRows = csvLines.Skip(1).ToArray();
// Convert dates to protobuf format, and everything else to a string
var protoHeaders = csvHeaders.Select(header => new FieldId { Name = header });
var protoRows = csvRows.Select(csvRow =>
{
var rowValues = csvRow.Split(',');
var protoValues = rowValues.Select(rowValue =>
System.DateTime.TryParse(rowValue, out var parsedDate)
? new Value { DateValue = Google.Type.Date.FromDateTime(parsedDate) }
: new Value { StringValue = rowValue });
var rowObject = new Table.Types.Row();
rowObject.Values.Add(protoValues);
return rowObject;
});
var dateFieldList = dateFields
.Split(',')
.Select(field => new FieldId { Name = field });
// Construct + execute the request
var dateShiftConfig = new DateShiftConfig
{
LowerBoundDays = lowerBoundDays,
UpperBoundDays = upperBoundDays
};
dateShiftConfig.Context = new FieldId { Name = contextField };
dateShiftConfig.CryptoKey = new CryptoKey
{
KmsWrapped = new KmsWrappedCryptoKey
{
WrappedKey = ByteString.FromBase64(wrappedKey),
CryptoKeyName = keyName
}
};
var deidConfig = new DeidentifyConfig
{
RecordTransformations = new RecordTransformations
{
FieldTransformations =
{
new FieldTransformation
{
PrimitiveTransformation = new PrimitiveTransformation
{
DateShiftConfig = dateShiftConfig
},
Fields = { dateFieldList }
}
}
}
};
var response = dlp.DeidentifyContent(
new DeidentifyContentRequest
{
Parent = new LocationName(projectId, "global").ToString(),
DeidentifyConfig = deidConfig,
Item = new ContentItem
{
Table = new Table
{
Headers = { protoHeaders },
Rows = { protoRows }
}
}
});
return response;
}
}
Go
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import (
"context"
"fmt"
"io"
dlp "cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
// deidentifyDateShift shifts dates found in the input between lowerBoundDays and
// upperBoundDays.
func deidentifyDateShift(w io.Writer, projectID string, lowerBoundDays, upperBoundDays int32, input string) error {
// projectID := "my-project-id"
// lowerBoundDays := -1
// upperBound := -1
// input := "2016-01-10"
// Will print "2016-01-09"
ctx := context.Background()
client, err := dlp.NewClient(ctx)
if err != nil {
return fmt.Errorf("dlp.NewClient: %w", err)
}
defer client.Close()
// Create a configured request.
req := &dlppb.DeidentifyContentRequest{
Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
DeidentifyConfig: &dlppb.DeidentifyConfig{
Transformation: &dlppb.DeidentifyConfig_InfoTypeTransformations{
InfoTypeTransformations: &dlppb.InfoTypeTransformations{
Transformations: []*dlppb.InfoTypeTransformations_InfoTypeTransformation{
{
InfoTypes: []*dlppb.InfoType{}, // Match all info types.
PrimitiveTransformation: &dlppb.PrimitiveTransformation{
Transformation: &dlppb.PrimitiveTransformation_DateShiftConfig{
DateShiftConfig: &dlppb.DateShiftConfig{
LowerBoundDays: lowerBoundDays,
UpperBoundDays: upperBoundDays,
},
},
},
},
},
},
},
},
// The InspectConfig is used to identify the DATE fields.
InspectConfig: &dlppb.InspectConfig{
InfoTypes: []*dlppb.InfoType{
{
Name: "DATE",
},
},
},
// The item to analyze.
Item: &dlppb.ContentItem{
DataItem: &dlppb.ContentItem_Value{
Value: input,
},
},
}
// Send the request.
r, err := client.DeidentifyContent(ctx, req)
if err != nil {
return fmt.Errorf("DeidentifyContent: %w", err)
}
// Print the result.
fmt.Fprint(w, r.GetItem().GetValue())
return nil
}
Java
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.common.base.Splitter;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DateShiftConfig;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.FieldTransformation;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.RecordTransformations;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Value;
import com.google.type.Date;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class DeIdentifyWithDateShift {
public static void main(String[] args) throws Exception {
// TODO(developer): Replace these variables before running the sample.
String projectId = "your-project-id";
Path inputCsvFile = Paths.get("path/to/your/input/file.csv");
Path outputCsvFile = Paths.get("path/to/your/output/file.csv");
deIdentifyWithDateShift(projectId, inputCsvFile, outputCsvFile);
}
public static void deIdentifyWithDateShift(
String projectId, Path inputCsvFile, Path outputCsvFile) throws IOException {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Read the contents of the CSV file into a Table
List<FieldId> headers;
List<Table.Row> rows;
try (BufferedReader input = Files.newBufferedReader(inputCsvFile)) {
// Parse and convert the first line into header names
headers =
Arrays.stream(input.readLine().split(","))
.map(header -> FieldId.newBuilder().setName(header).build())
.collect(Collectors.toList());
// Parse the remainder of the file as Table.Rows
rows =
input.lines().map(DeIdentifyWithDateShift::parseLineAsRow).collect(Collectors.toList());
}
Table table = Table.newBuilder().addAllHeaders(headers).addAllRows(rows).build();
ContentItem item = ContentItem.newBuilder().setTable(table).build();
// Set the maximum days to shift dates backwards (lower bound) or forward (upper bound)
DateShiftConfig dateShiftConfig =
DateShiftConfig.newBuilder().setLowerBoundDays(5).setUpperBoundDays(5).build();
PrimitiveTransformation transformation =
PrimitiveTransformation.newBuilder().setDateShiftConfig(dateShiftConfig).build();
// Specify which fields the DateShift should apply too
List<FieldId> dateFields = Arrays.asList(headers.get(1), headers.get(3));
FieldTransformation fieldTransformation =
FieldTransformation.newBuilder()
.addAllFields(dateFields)
.setPrimitiveTransformation(transformation)
.build();
RecordTransformations recordTransformations =
RecordTransformations.newBuilder().addFieldTransformations(fieldTransformation).build();
// Specify the config for the de-identify request
DeidentifyConfig deidentifyConfig =
DeidentifyConfig.newBuilder().setRecordTransformations(recordTransformations).build();
// Combine configurations into a request for the service.
DeidentifyContentRequest request =
DeidentifyContentRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setItem(item)
.setDeidentifyConfig(deidentifyConfig)
.build();
// Send the request and receive response from the service
DeidentifyContentResponse response = dlp.deidentifyContent(request);
// Write the results to the target CSV file
try (BufferedWriter writer = Files.newBufferedWriter(outputCsvFile)) {
Table outTable = response.getItem().getTable();
String headerOut =
outTable.getHeadersList().stream()
.map(FieldId::getName)
.collect(Collectors.joining(","));
writer.write(headerOut + "\n");
List<String> rowOutput =
outTable.getRowsList().stream()
.map(row -> joinRow(row.getValuesList()))
.collect(Collectors.toList());
for (String line : rowOutput) {
writer.write(line + "\n");
}
System.out.println("Content written to file: " + outputCsvFile.toString());
}
}
}
// Convert the string from the csv file into com.google.type.Date
public static Date parseAsDate(String s) {
LocalDate date = LocalDate.parse(s, DateTimeFormatter.ofPattern("MM/dd/yyyy"));
return Date.newBuilder()
.setDay(date.getDayOfMonth())
.setMonth(date.getMonthValue())
.setYear(date.getYear())
.build();
}
// Each row is in the format: Name,BirthDate,CreditCardNumber,RegisterDate
public static Table.Row parseLineAsRow(String line) {
List<String> values = Splitter.on(",").splitToList(line);
Value name = Value.newBuilder().setStringValue(values.get(0)).build();
Value birthDate = Value.newBuilder().setDateValue(parseAsDate(values.get(1))).build();
Value creditCardNumber = Value.newBuilder().setStringValue(values.get(2)).build();
Value registerDate = Value.newBuilder().setDateValue(parseAsDate(values.get(3))).build();
return Table.Row.newBuilder()
.addValues(name)
.addValues(birthDate)
.addValues(creditCardNumber)
.addValues(registerDate)
.build();
}
public static String formatDate(Date d) {
return String.format("%s/%s/%s", d.getMonth(), d.getDay(), d.getYear());
}
public static String joinRow(List<Value> values) {
String name = values.get(0).getStringValue();
String birthDate = formatDate(values.get(1).getDateValue());
String creditCardNumber = values.get(2).getStringValue();
String registerDate = formatDate(values.get(3).getDateValue());
return String.join(",", name, birthDate, creditCardNumber, registerDate);
}
}
Node.js
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');
// Instantiates a client
const dlp = new DLP.DlpServiceClient();
// Import other required libraries
const fs = require('fs');
// The project ID to run the API call under
// const projectId = 'my-project';
// The path to the CSV file to deidentify
// The first row of the file must specify column names, and all other rows
// must contain valid values
// const inputCsvFile = '/path/to/input/file.csv';
// The path to save the date-shifted CSV file to
// const outputCsvFile = '/path/to/output/file.csv';
// The list of (date) fields in the CSV file to date shift
// const dateFields = [{ name: 'birth_date'}, { name: 'register_date' }];
// The maximum number of days to shift a date backward
// const lowerBoundDays = 1;
// The maximum number of days to shift a date forward
// const upperBoundDays = 1;
// (Optional) The column to determine date shift amount based on
// If this is not specified, a random shift amount will be used for every row
// If this is specified, then 'wrappedKey' and 'keyName' must also be set
// const contextFieldId = [{ name: 'user_id' }];
// (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key
// If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set
// const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME';
// (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
// This key should be encrypted using the Cloud KMS key specified above
// If this is specified, then 'keyName' and 'contextFieldId' must also be set
// const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY'
// Helper function for converting CSV rows to Protobuf types
const rowToProto = row => {
const values = row.split(',');
const convertedValues = values.map(value => {
if (Date.parse(value)) {
const date = new Date(value);
return {
dateValue: {
year: date.getFullYear(),
month: date.getMonth() + 1,
day: date.getDate(),
},
};
} else {
// Convert all non-date values to strings
return {stringValue: value.toString()};
}
});
return {values: convertedValues};
};
async function deidentifyWithDateShift() {
// Read and parse a CSV file
const csvLines = fs
.readFileSync(inputCsvFile)
.toString()
.split('\n')
.filter(line => line.includes(','));
const csvHeaders = csvLines[0].split(',');
const csvRows = csvLines.slice(1);
// Construct the table object
const tableItem = {
table: {
headers: csvHeaders.map(header => {
return {name: header};
}),
rows: csvRows.map(row => rowToProto(row)),
},
};
// Construct DateShiftConfig
const dateShiftConfig = {
lowerBoundDays: lowerBoundDays,
upperBoundDays: upperBoundDays,
};
if (contextFieldId && keyName && wrappedKey) {
dateShiftConfig.context = {name: contextFieldId};
dateShiftConfig.cryptoKey = {
kmsWrapped: {
wrappedKey: wrappedKey,
cryptoKeyName: keyName,
},
};
} else if (contextFieldId || keyName || wrappedKey) {
throw new Error(
'You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!'
);
}
// Construct deidentification request
const request = {
parent: `projects/${projectId}/locations/global`,
deidentifyConfig: {
recordTransformations: {
fieldTransformations: [
{
fields: dateFields,
primitiveTransformation: {
dateShiftConfig: dateShiftConfig,
},
},
],
},
},
item: tableItem,
};
// Run deidentification request
const [response] = await dlp.deidentifyContent(request);
const tableRows = response.item.table.rows;
// Write results to a CSV file
tableRows.forEach((row, rowIndex) => {
const rowValues = row.values.map(
value =>
value.stringValue ||
`${value.dateValue.month}/${value.dateValue.day}/${value.dateValue.year}`
);
csvLines[rowIndex + 1] = rowValues.join(',');
});
csvLines.push('');
fs.writeFileSync(outputCsvFile, csvLines.join('\n'));
// Print status
console.log(`Successfully saved date-shift output to ${outputCsvFile}`);
}
deidentifyWithDateShift();
PHP
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
use DateTime;
use Exception;
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\CryptoKey;
use Google\Cloud\Dlp\V2\DateShiftConfig;
use Google\Cloud\Dlp\V2\DeidentifyConfig;
use Google\Cloud\Dlp\V2\DeidentifyContentRequest;
use Google\Cloud\Dlp\V2\FieldId;
use Google\Cloud\Dlp\V2\FieldTransformation;
use Google\Cloud\Dlp\V2\KmsWrappedCryptoKey;
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
use Google\Cloud\Dlp\V2\RecordTransformations;
use Google\Cloud\Dlp\V2\Table;
use Google\Cloud\Dlp\V2\Table\Row;
use Google\Cloud\Dlp\V2\Value;
use Google\Type\Date;
/**
* Deidentify dates in a CSV file by pseudorandomly shifting them.
* If contextFieldName is not specified, a random shift amount will be used for every row.
* If contextFieldName is specified, then 'wrappedKey' and 'keyName' must also be set.
*
* @param string $callingProjectId The GCP Project ID to run the API call under
* @param string $inputCsvFile The path to the CSV file to deidentify
* @param string $outputCsvFile The path to save the date-shifted CSV file to
* @param string $dateFieldNames The comma-separated list of (date) fields in the CSV file to date shift
* @param int $lowerBoundDays The maximum number of days to shift a date backward
* @param int $upperBoundDays The maximum number of days to shift a date forward
* @param string $contextFieldName (Optional) The column to determine date shift amount based on
* @param string $keyName (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
* @param string $wrappedKey (Optional) The name of the Cloud KMS key used to encrypt (wrap) the AES-256 key
*/
function deidentify_dates(
string $callingProjectId,
string $inputCsvFile,
string $outputCsvFile,
string $dateFieldNames,
int $lowerBoundDays,
int $upperBoundDays,
string $contextFieldName = '',
string $keyName = '',
string $wrappedKey = ''
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
// Read a CSV file
$csvLines = file($inputCsvFile, FILE_IGNORE_NEW_LINES);
$csvHeaders = explode(',', $csvLines[0]);
$csvRows = array_slice($csvLines, 1);
// Convert CSV file into protobuf objects
$tableHeaders = array_map(function ($csvHeader) {
return (new FieldId)->setName($csvHeader);
}, $csvHeaders);
$tableRows = array_map(function ($csvRow) {
$rowValues = array_map(function ($csvValue) {
if ($csvDate = DateTime::createFromFormat('m/d/Y', $csvValue)) {
$date = (new Date())
->setYear((int) $csvDate->format('Y'))
->setMonth((int) $csvDate->format('m'))
->setDay((int) $csvDate->format('d'));
return (new Value())
->setDateValue($date);
} else {
return (new Value())
->setStringValue($csvValue);
}
}, explode(',', $csvRow));
return (new Row())
->setValues($rowValues);
}, $csvRows);
// Convert date fields into protobuf objects
$dateFields = array_map(function ($dateFieldName) {
return (new FieldId())->setName($dateFieldName);
}, explode(',', $dateFieldNames));
// Construct the table object
$table = (new Table())
->setHeaders($tableHeaders)
->setRows($tableRows);
$item = (new ContentItem())
->setTable($table);
// Construct dateShiftConfig
$dateShiftConfig = (new DateShiftConfig())
->setLowerBoundDays($lowerBoundDays)
->setUpperBoundDays($upperBoundDays);
if ($contextFieldName && $keyName && $wrappedKey) {
$contextField = (new FieldId())
->setName($contextFieldName);
// Create the wrapped crypto key configuration object
$kmsWrappedCryptoKey = (new KmsWrappedCryptoKey())
->setWrappedKey(base64_decode($wrappedKey))
->setCryptoKeyName($keyName);
$cryptoKey = (new CryptoKey())
->setKmsWrapped($kmsWrappedCryptoKey);
$dateShiftConfig
->setContext($contextField)
->setCryptoKey($cryptoKey);
} elseif ($contextFieldName || $keyName || $wrappedKey) {
throw new Exception('You must set either ALL or NONE of {$contextFieldName, $keyName, $wrappedKey}!');
}
// Create the information transform configuration objects
$primitiveTransformation = (new PrimitiveTransformation())
->setDateShiftConfig($dateShiftConfig);
$fieldTransformation = (new FieldTransformation())
->setPrimitiveTransformation($primitiveTransformation)
->setFields($dateFields);
$recordTransformations = (new RecordTransformations())
->setFieldTransformations([$fieldTransformation]);
// Create the deidentification configuration object
$deidentifyConfig = (new DeidentifyConfig())
->setRecordTransformations($recordTransformations);
$parent = "projects/$callingProjectId/locations/global";
// Run request
$deidentifyContentRequest = (new DeidentifyContentRequest())
->setParent($parent)
->setDeidentifyConfig($deidentifyConfig)
->setItem($item);
$response = $dlp->deidentifyContent($deidentifyContentRequest);
// Check for errors
foreach ($response->getOverview()->getTransformationSummaries() as $summary) {
foreach ($summary->getResults() as $result) {
if ($details = $result->getDetails()) {
printf('Error: %s' . PHP_EOL, $details);
return;
}
}
}
// Save the results to a file
$csvRef = fopen($outputCsvFile, 'w');
fputcsv($csvRef, $csvHeaders);
foreach ($response->getItem()->getTable()->getRows() as $tableRow) {
$values = array_map(function ($tableValue) {
if ($tableValue->getStringValue()) {
return $tableValue->getStringValue();
}
$protoDate = $tableValue->getDateValue();
$date = mktime(0, 0, 0, $protoDate->getMonth(), $protoDate->getDay(), $protoDate->getYear());
return strftime('%D', $date);
}, iterator_to_array($tableRow->getValues()));
fputcsv($csvRef, $values);
};
fclose($csvRef);
printf('Deidentified dates written to %s' . PHP_EOL, $outputCsvFile);
}
Python
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import base64
import csv
from datetime import datetime
from typing import List
import google.cloud.dlp
from google.cloud.dlp_v2 import types
def deidentify_with_date_shift(
project: str,
input_csv_file: str = None,
output_csv_file: str = None,
date_fields: List[str] = None,
lower_bound_days: int = None,
upper_bound_days: int = None,
context_field_id: str = None,
wrapped_key: str = None,
key_name: str = None,
) -> None:
"""Uses the Data Loss Prevention API to deidentify dates in a CSV file by
pseudorandomly shifting them.
Args:
project: The Google Cloud project id to use as a parent resource.
input_csv_file: The path to the CSV file to deidentify. The first row
of the file must specify column names, and all other rows must
contain valid values.
output_csv_file: The path to save the date-shifted CSV file.
date_fields: The list of (date) fields in the CSV file to date shift.
Example: ['birth_date', 'register_date']
lower_bound_days: The maximum number of days to shift a date backward
upper_bound_days: The maximum number of days to shift a date forward
context_field_id: (Optional) The column to determine date shift amount
based on. If this is not specified, a random shift amount will be
used for every row. If this is specified, then 'wrappedKey' and
'keyName' must also be set. Example:
contextFieldId = [{ 'name': 'user_id' }]
key_name: (Optional) The name of the Cloud KMS key used to encrypt
('wrap') the AES-256 key. Example:
key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
wrapped_key: (Optional) The encrypted ('wrapped') AES-256 key to use.
This key should be encrypted using the Cloud KMS key specified by
key_name.
Returns:
None; the response from the API is printed to the terminal.
"""
# Instantiate a client
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Convert the project id into a full resource id.
parent = f"projects/{project}/locations/global"
# Convert date field list to Protobuf type
def map_fields(field: str) -> dict:
return {"name": field}
if date_fields:
date_fields = map(map_fields, date_fields)
else:
date_fields = []
f = []
with open(input_csv_file) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
f.append(row)
# Helper function for converting CSV rows to Protobuf types
def map_headers(header: str) -> dict:
return {"name": header}
def map_data(value: str) -> dict:
try:
date = datetime.strptime(value, "%m/%d/%Y")
return {
"date_value": {"year": date.year, "month": date.month, "day": date.day}
}
except ValueError:
return {"string_value": value}
def map_rows(row: str) -> dict:
return {"values": map(map_data, row)}
# Using the helper functions, convert CSV rows to protobuf-compatible
# dictionaries.
csv_headers = map(map_headers, f[0])
csv_rows = map(map_rows, f[1:])
# Construct the table dict
table_item = {"table": {"headers": csv_headers, "rows": csv_rows}}
# Construct date shift config
date_shift_config = {
"lower_bound_days": lower_bound_days,
"upper_bound_days": upper_bound_days,
}
# If using a Cloud KMS key, add it to the date_shift_config.
# The wrapped key is base64-encoded, but the library expects a binary
# string, so decode it here.
if context_field_id and key_name and wrapped_key:
date_shift_config["context"] = {"name": context_field_id}
date_shift_config["crypto_key"] = {
"kms_wrapped": {
"wrapped_key": base64.b64decode(wrapped_key),
"crypto_key_name": key_name,
}
}
elif context_field_id or key_name or wrapped_key:
raise ValueError(
"""You must set either ALL or NONE of
[context_field_id, key_name, wrapped_key]!"""
)
# Construct Deidentify Config
deidentify_config = {
"record_transformations": {
"field_transformations": [
{
"fields": date_fields,
"primitive_transformation": {
"date_shift_config": date_shift_config
},
}
]
}
}
# Write to CSV helper methods
def write_header(header: types.storage.FieldId) -> str:
return header.name
def write_data(data: types.storage.Value) -> str:
return data.string_value or "{}/{}/{}".format(
data.date_value.month,
data.date_value.day,
data.date_value.year,
)
# Call the API
response = dlp.deidentify_content(
request={
"parent": parent,
"deidentify_config": deidentify_config,
"item": table_item,
}
)
# Write results to CSV file
with open(output_csv_file, "w") as csvfile:
write_file = csv.writer(csvfile, delimiter=",")
write_file.writerow(map(write_header, response.item.table.headers))
for row in response.item.table.rows:
write_file.writerow(map(write_data, row.values))
# Print status
print(f"Successfully saved date-shift output to {output_csv_file}")
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。