敏感数据保护可以检测结构化内容(如 CSV)中的敏感数据并对其进行分类。通过以表形式进行检查或去标识化,结构和列可为敏感数据保护提供额外的线索,使其能够针对某些使用场景提供更好的结果。
检查表
以下代码示例演示了如何检查数据表中是否存在敏感内容。 表支持各种类型。
C#
如需了解如何安装和使用用于敏感数据保护的客户端库,请参阅敏感数据保护客户端库。
如需向敏感数据保护服务进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
using System;
using System.Collections.Generic;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
public class InspectTable
{
public static InspectContentResponse InspectTableData(
string projectId,
Table tableToInspect = null,
IEnumerable<InfoType> infoTypes = null)
{
// Instantiate a client.
var dlp = DlpServiceClient.Create();
// Construct the table if null.
if (tableToInspect == null)
{
var row1 = new Value[]
{
new Value { StringValue = "John Doe" },
new Value { StringValue = "(206) 555-0123" }
};
var row2 = new Value[]
{
new Value { StringValue = "Mark Twain" },
new Value { StringValue = "(450) 555-0123" }
};
tableToInspect = new Table
{
Headers =
{
new FieldId { Name = "Name" }, new FieldId { Name = "Phone" }
},
Rows =
{
new Table.Types.Row { Values = { row1 } },
new Table.Types.Row { Values = { row2 } }
}
};
}
// Set content item.
var contentItem = new ContentItem { Table = tableToInspect };
// Construct inspect config.
var inspectConfig = new InspectConfig
{
InfoTypes =
{
infoTypes ?? new InfoType[] { new InfoType { Name = "PHONE_NUMBER" } }
},
IncludeQuote = true,
};
// Construct a request.
var request = new InspectContentRequest
{
ParentAsLocationName = new LocationName(projectId, "global"),
InspectConfig = inspectConfig,
Item = contentItem,
};
// Call the API.
var response = dlp.InspectContent(request);
// Inspect the results.
var resultFindings = response.Result.Findings;
Console.WriteLine($"Findings: {resultFindings.Count}");
foreach (var f in resultFindings)
{
Console.WriteLine("Quote: " + f.Quote);
Console.WriteLine("Info type: " + f.InfoType.Name);
Console.WriteLine("Likelihood: " + f.Likelihood);
}
return response;
}
}
Go
如需了解如何安装和使用用于敏感数据保护的客户端库,请参阅敏感数据保护客户端库。
如需向敏感数据保护服务进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
import (
"context"
"fmt"
"io"
dlp "cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
// inspectTable inspects a table for sensitive content
func inspectTable(w io.Writer, projectID string) error {
// projectID := "your-project-id"
ctx := context.Background()
// Initialize a client once and reuse it to send multiple requests. Clients
// are safe to use across goroutines. When the client is no longer needed,
// call the Close method to cleanup its resources.
client, err := dlp.NewClient(ctx)
if err != nil {
return err
}
// Closing the client safely cleans up background resources.
defer client.Close()
// create a default table
tableToInspect := &dlppb.Table{
Headers: []*dlppb.FieldId{
{Name: "name"},
{Name: "phone"},
},
Rows: []*dlppb.Table_Row{
{
Values: []*dlppb.Value{
{
Type: &dlppb.Value_StringValue{
StringValue: "John Doe",
},
},
{
Type: &dlppb.Value_StringValue{
StringValue: "(206) 555-0123",
},
},
},
},
},
}
// Specify the table to be inspected.
contentItem := &dlppb.ContentItem{
DataItem: &dlppb.ContentItem_Table{
Table: tableToInspect,
},
}
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
infoTypes := []*dlppb.InfoType{
{Name: "PHONE_NUMBER"},
}
// Construct the Inspect request to be sent by the client.
req := &dlppb.InspectContentRequest{
Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
Item: contentItem,
InspectConfig: &dlppb.InspectConfig{
InfoTypes: infoTypes,
IncludeQuote: true,
},
}
// Send the request.
resp, err := client.InspectContent(ctx, req)
if err != nil {
return err
}
// Print the results.
fmt.Fprintf(w, "Findings: %v\n", len(resp.Result.Findings))
for _, v := range resp.GetResult().Findings {
fmt.Fprintf(w, "Quote: %v\n", v.GetQuote())
fmt.Fprintf(w, "Infotype Name: %v\n", v.GetInfoType().GetName())
fmt.Fprintf(w, "Likelihood: %v\n", v.GetLikelihood())
}
return nil
}
Java
如需了解如何安装和使用用于敏感数据保护的客户端库,请参阅敏感数据保护客户端库。
如需向敏感数据保护服务进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
public class InspectTable {
public static void main(String[] args) throws Exception {
// TODO(developer): Replace these variables before running the sample.
String projectId = "your-project-id";
Table tableToInspect =
Table.newBuilder()
.addHeaders(FieldId.newBuilder().setName("name").build())
.addHeaders(FieldId.newBuilder().setName("phone").build())
.addRows(
Row.newBuilder()
.addValues(Value.newBuilder().setStringValue("John Doe").build())
.addValues(Value.newBuilder().setStringValue("(206) 555-0123").build()))
.build();
inspectTable(projectId, tableToInspect);
}
// Inspects the provided text.
public static void inspectTable(String projectId, Table tableToInspect) {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Specify the table to be inspected.
ContentItem item = ContentItem.newBuilder().setTable(tableToInspect).build();
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
InfoType infoType = InfoType.newBuilder().setName("PHONE_NUMBER").build();
// Construct the configuration for the Inspect request.
InspectConfig config =
InspectConfig.newBuilder().addInfoTypes(infoType).setIncludeQuote(true).build();
// Construct the Inspect request to be sent by the client.
InspectContentRequest request =
InspectContentRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setItem(item)
.setInspectConfig(config)
.build();
// Use the client to send the API request.
InspectContentResponse response = dlp.inspectContent(request);
// Parse the response and process results
System.out.println("Findings: " + response.getResult().getFindingsCount());
for (Finding f : response.getResult().getFindingsList()) {
System.out.println("\tQuote: " + f.getQuote());
System.out.println("\tInfo type: " + f.getInfoType().getName());
System.out.println("\tLikelihood: " + f.getLikelihood());
}
} catch (Exception e) {
System.out.println("Error during inspectString: \n" + e.toString());
}
}
}
Node.js
如需了解如何安装和使用用于敏感数据保护的客户端库,请参阅敏感数据保护客户端库。
如需向敏感数据保护服务进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');
// Instantiates a client
const dlp = new DLP.DlpServiceClient();
// The project ID to run the API call under
// const projectId = 'my-project';
// The infoTypes of information to match
const infoTypes = [{name: 'PHONE_NUMBER'}];
// Table data
const tableData = {
headers: [{name: 'name'}, {name: 'phone'}],
rows: [
{
values: [{stringValue: 'John Doe'}, {stringValue: '(206) 555-0123'}],
},
],
};
async function inspectTable() {
// Specify the table to be inspected.
const item = {
table: tableData,
};
// Construct the configuration for the Inspect request.
const inspectConfig = {
infoTypes: infoTypes,
includeQuote: true,
};
// Construct the Inspect request to be sent by the client.
const request = {
parent: `projects/${projectId}/locations/global`,
inspectConfig: inspectConfig,
item: item,
};
// Use the client to send the API request.
const [response] = await dlp.inspectContent(request);
// Print findings.
const findings = response.result.findings;
if (findings.length > 0) {
console.log(`Findings: ${findings.length}\n`);
findings.forEach(finding => {
console.log(`InfoType: ${finding.infoType.name}`);
console.log(`\tQuote: ${finding.quote}`);
console.log(`\tLikelihood: ${finding.likelihood} \n`);
});
} else {
console.log('No findings.');
}
}
inspectTable();
PHP
如需了解如何安装和使用用于敏感数据保护的客户端库,请参阅敏感数据保护客户端库。
如需向敏感数据保护服务进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\FieldId;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\InspectContentRequest;
use Google\Cloud\Dlp\V2\Likelihood;
use Google\Cloud\Dlp\V2\Table;
use Google\Cloud\Dlp\V2\Table\Row;
use Google\Cloud\Dlp\V2\Value;
/**
* Inspect a table for sensitive content.
*
* @param string $projectId The Google Cloud project id to use as a parent resource.
*/
function inspect_table(string $projectId): void
{
// Instantiate a client.
$dlp = new DlpServiceClient();
$parent = "projects/$projectId/locations/global";
// Specify the table to be inspected.
$tableToDeIdentify = (new Table())
->setHeaders([
(new FieldId())
->setName('NAME'),
(new FieldId())
->setName('PHONE'),
])
->setRows([
(new Row())->setValues([
(new Value())
->setStringValue('John Doe'),
(new Value())
->setStringValue('(206) 555-0123')
])
]);
$item = (new ContentItem())
->setTable($tableToDeIdentify);
// Construct the configuration for the Inspect request.
$phoneNumber = (new InfoType())
->setName('PHONE_NUMBER');
$inspectConfig = (new InspectConfig())
->setInfoTypes([$phoneNumber])
->setIncludeQuote(true);
// Run request.
$inspectContentRequest = (new InspectContentRequest())
->setParent($parent)
->setInspectConfig($inspectConfig)
->setItem($item);
$response = $dlp->inspectContent($inspectContentRequest);
// Print the results.
$findings = $response->getResult()->getFindings();
if (count($findings) == 0) {
printf('No findings.' . PHP_EOL);
} else {
printf('Findings:' . PHP_EOL);
foreach ($findings as $finding) {
printf(' Quote: %s' . PHP_EOL, $finding->getQuote());
printf(' Info type: %s' . PHP_EOL, $finding->getInfoType()->getName());
printf(' Likelihood: %s' . PHP_EOL, Likelihood::name($finding->getLikelihood()));
}
}
}
Python
如需了解如何安装和使用用于敏感数据保护的客户端库,请参阅敏感数据保护客户端库。
如需向敏感数据保护服务进行身份验证,请设置应用默认凭据。如需了解详情,请参阅为本地开发环境设置身份验证。
from typing import List, Optional
import google.cloud.dlp
def inspect_table(
project: str,
data: str,
info_types: List[str],
custom_dictionaries: List[str] = None,
custom_regexes: List[str] = None,
min_likelihood: Optional[str] = None,
max_findings: Optional[int] = None,
include_quote: bool = True,
) -> None:
"""Uses the Data Loss Prevention API to analyze strings for protected data.
Args:
project: The Google Cloud project id to use as a parent resource.
data: Json string representing table data.
info_types: A list of strings representing info types to look for.
A full list of info type categories can be fetched from the API.
min_likelihood: A string representing the minimum likelihood threshold
that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
max_findings: The maximum number of findings to report; 0 = no maximum.
include_quote: Boolean for whether to display a quote of the detected
information in the results.
Returns:
None; the response from the API is printed to the terminal.
Example:
data = {
"header":[
"email",
"phone number"
],
"rows":[
[
"robertfrost@xyz.com",
"4232342345"
],
[
"johndoe@pqr.com",
"4253458383"
]
]
}
>> $ python inspect_content.py table \
'{"header": ["email", "phone number"],
"rows": [["robertfrost@xyz.com", "4232342345"],
["johndoe@pqr.com", "4253458383"]]}'
>> Quote: robertfrost@xyz.com
Info type: EMAIL_ADDRESS
Likelihood: 4
Quote: johndoe@pqr.com
Info type: EMAIL_ADDRESS
Likelihood: 4
"""
# Instantiate a client.
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Prepare info_types by converting the list of strings into a list of
# dictionaries (protos are also accepted).
info_types = [{"name": info_type} for info_type in info_types]
# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [
{
"info_type": {"name": f"CUSTOM_DICTIONARY_{i}"},
"dictionary": {"word_list": {"words": custom_dict.split(",")}},
}
for i, custom_dict in enumerate(custom_dictionaries)
]
if custom_regexes is None:
custom_regexes = []
regexes = [
{
"info_type": {"name": f"CUSTOM_REGEX_{i}"},
"regex": {"pattern": custom_regex},
}
for i, custom_regex in enumerate(custom_regexes)
]
custom_info_types = dictionaries + regexes
# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
inspect_config = {
"info_types": info_types,
"custom_info_types": custom_info_types,
"min_likelihood": min_likelihood,
"include_quote": include_quote,
"limits": {"max_findings_per_request": max_findings},
}
# Construct the `table`. For more details on the table schema, please see
# https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
headers = [{"name": val} for val in data["header"]]
rows = []
for row in data["rows"]:
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
table = {}
table["headers"] = headers
table["rows"] = rows
item = {"table": table}
# Convert the project id into a full resource id.
parent = f"projects/{project}"
# Call the API.
response = dlp.inspect_content(
request={"parent": parent, "inspect_config": inspect_config, "item": item}
)
# Print out the results.
if response.result.findings:
for finding in response.result.findings:
try:
if finding.quote:
print(f"Quote: {finding.quote}")
except AttributeError:
pass
print(f"Info type: {finding.info_type.name}")
print(f"Likelihood: {finding.likelihood}")
else:
print("No findings.")
REST
要详细了解如何将 DLP API 与 JSON 结合使用,请参阅 JSON 快速入门。
JSON 输入:
POST https://dlp.googleapis.com/v2/projects/[PROJECT_ID]/content:inspect?key={YOUR_API_KEY}
{
"item":{
"table":{
"headers": [{"name":"name"}, {"name":"phone"}],
"rows": [{
"values":[
{"string_value": "John Doe"},
{"string_value": "(206) 555-0123"}
]}
],
}
},
"inspectConfig":{
"infoTypes":[
{
"name":"PHONE_NUMBER"
}
],
"includeQuote":true
}
}
JSON 输出:
{
"result": {
"findings": [
{
"quote": "(206) 555-0123",
"infoType": {
"name": "PHONE_NUMBER"
},
"likelihood": "VERY_LIKELY",
"location": {
"byteRange": {
"end": "14"
},
"codepointRange": {
"end": "14"
},
"contentLocations": [
{
"recordLocation": {
"fieldId": {
"name": "phone"
},
"tableLocation": {
}
}
}
]
},
"createTime": "2019-03-08T23:55:10.980Z"
}
]
}
}
文本与结构化文本
对文本进行结构化可以帮助提供上下文。如果以字符串形式来检查与前述示例中的请求相同的请求(即,仅采用“John Doe, (206) 555-0123”形式),则提供的结果准确性会降低。这是因为敏感数据保护对于数字用途所具备的上下文线索较少。可能的话,请考虑将字符串解析为表对象,以获得最准确的扫描结果。