Go
To learn how to install and use the client library for Sensitive Data Protection, see
Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials.
For more information, see
Set up authentication for a local development environment.
import (
"context"
"fmt"
"io"
dlp "cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
// deIdentifyTableBucketing de-identifies data using table bucketing
func deIdentifyTableBucketing(w io.Writer, projectID string) error {
// projectId := "your-project-id"
// table := "your-table-value"
row1 := &dlppb.Table_Row{
Values: []*dlppb.Value{
{Type: &dlppb.Value_StringValue{StringValue: "22"}},
{Type: &dlppb.Value_StringValue{StringValue: "Jane Austen"}},
{Type: &dlppb.Value_StringValue{StringValue: "21"}},
},
}
row2 := &dlppb.Table_Row{
Values: []*dlppb.Value{
{Type: &dlppb.Value_StringValue{StringValue: "55"}},
{Type: &dlppb.Value_StringValue{StringValue: "Mark Twain"}},
{Type: &dlppb.Value_StringValue{StringValue: "75"}},
},
}
row3 := &dlppb.Table_Row{
Values: []*dlppb.Value{
{Type: &dlppb.Value_StringValue{StringValue: "101"}},
{Type: &dlppb.Value_StringValue{StringValue: "Charles Dickens"}},
{Type: &dlppb.Value_StringValue{StringValue: "95"}},
},
}
table := &dlppb.Table{
Headers: []*dlppb.FieldId{
{Name: "AGE"},
{Name: "PATIENT"},
{Name: "HAPPINESS SCORE"},
},
Rows: []*dlppb.Table_Row{
{Values: row1.Values},
{Values: row2.Values},
{Values: row3.Values},
},
}
ctx := context.Background()
// Initialize a client once and reuse it to send multiple requests. Clients
// are safe to use across goroutines. When the client is no longer needed,
// call the Close method to cleanup its resources.
client, err := dlp.NewClient(ctx)
if err != nil {
return err
}
// Closing the client safely cleans up background resources.
defer client.Close()
// Specify what content you want the service to de-identify.
contentItem := &dlppb.ContentItem{
DataItem: &dlppb.ContentItem_Table{
Table: table,
},
}
// Specify how the content should be de-identified.
fixedSizeBucketingConfig := &dlppb.FixedSizeBucketingConfig{
BucketSize: 10,
LowerBound: &dlppb.Value{
Type: &dlppb.Value_IntegerValue{
IntegerValue: 0,
},
},
UpperBound: &dlppb.Value{
Type: &dlppb.Value_IntegerValue{
IntegerValue: 100,
},
},
}
primitiveTransformation := &dlppb.PrimitiveTransformation_FixedSizeBucketingConfig{
FixedSizeBucketingConfig: fixedSizeBucketingConfig,
}
// Specify field to be encrypted.
fieldId := &dlppb.FieldId{
Name: "HAPPINESS SCORE",
}
// Associate the encryption with the specified field.
fieldTransformation := &dlppb.FieldTransformation{
Transformation: &dlppb.FieldTransformation_PrimitiveTransformation{
PrimitiveTransformation: &dlppb.PrimitiveTransformation{
Transformation: primitiveTransformation,
},
},
Fields: []*dlppb.FieldId{
fieldId,
},
}
recordTransformations := &dlppb.RecordTransformations{
FieldTransformations: []*dlppb.FieldTransformation{
fieldTransformation,
},
}
// Construct the de-identification request to be sent by the client.
req := &dlppb.DeidentifyContentRequest{
Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
DeidentifyConfig: &dlppb.DeidentifyConfig{
Transformation: &dlppb.DeidentifyConfig_RecordTransformations{
RecordTransformations: recordTransformations,
},
},
Item: contentItem,
}
// Send the request.
resp, err := client.DeidentifyContent(ctx, req)
if err != nil {
return err
}
// Print the results.
fmt.Fprintf(w, "Table after de-identification : %v", resp.GetItem().GetTable())
return nil
}
Python
To learn how to install and use the client library for Sensitive Data Protection, see
Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials.
For more information, see
Set up authentication for a local development environment.
from typing import Dict, List, Union
import google.cloud.dlp
from google.cloud.dlp_v2 import types
def deidentify_table_bucketing(
project: str,
table_data: Dict[str, Union[List[str], List[List[str]]]],
deid_content_list: List[str],
bucket_size: int,
bucketing_lower_bound: int,
bucketing_upper_bound: int,
) -> types.dlp.Table:
"""Uses the Data Loss Prevention API to de-identify sensitive data in a
table by replacing them with fixed size bucket ranges.
Args:
project: The Google Cloud project id to use as a parent resource.
table_data: Dictionary representing table data.
deid_content_list: A list of fields in table to de-identify.
bucket_size: Size of each bucket for fixed sized bucketing
(except for minimum and maximum buckets). So if ``bucketing_lower_bound`` = 10,
``bucketing_upper_bound`` = 89, and ``bucket_size`` = 10, then the
following buckets would be used: -10, 10-20, 20-30, 30-40,
40-50, 50-60, 60-70, 70-80, 80-89, 89+.
bucketing_lower_bound: Lower bound value of buckets.
bucketing_upper_bound: Upper bound value of buckets.
Returns:
De-identified table is returned;
the response from the API is also printed to the terminal.
Example:
>> $ python deidentify_table_bucketing.py \
'{"header": ["email", "phone number", "age"],
"rows": [["robertfrost@example.com", "4232342345", "35"],
["johndoe@example.com", "4253458383", "68"]]}' \
["age"] 10 0 100
>> '{"header": ["email", "phone number", "age"],
"rows": [["robertfrost@example.com", "4232342345", "30:40"],
["johndoe@example.com", "4253458383", "60:70"]]}'
"""
# Instantiate a client.
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Convert the project id into a full resource id.
parent = f"projects/{project}/locations/global"
# Construct the `table`. For more details on the table schema, please see
# https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
headers = [{"name": val} for val in table_data["header"]]
rows = []
for row in table_data["rows"]:
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
table = {"headers": headers, "rows": rows}
# Construct the `item`.
item = {"table": table}
# Construct fixed sized bucketing configuration
fixed_size_bucketing_config = {
"bucket_size": bucket_size,
"lower_bound": {"integer_value": bucketing_lower_bound},
"upper_bound": {"integer_value": bucketing_upper_bound},
}
# Specify fields to be de-identified
deid_content_list = [{"name": _i} for _i in deid_content_list]
# Construct Deidentify Config
deidentify_config = {
"record_transformations": {
"field_transformations": [
{
"fields": deid_content_list,
"primitive_transformation": {
"fixed_size_bucketing_config": fixed_size_bucketing_config
},
}
]
}
}
# Call the API.
response = dlp.deidentify_content(
request={"parent": parent, "deidentify_config": deidentify_config, "item": item}
)
# Print the results.
print(f"Table after de-identification: {response.item.table}")
# Return the response.
return response.item.table