此示例使用自定义正则表达式并借助热词规则提高匹配的可能性。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
C#
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
using System;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
using static Google.Cloud.Dlp.V2.CustomInfoType.Types;
public class InspectDataWithHotwordRule
{
public static InspectContentResponse InspectDataHotwordRule(
string projectId,
string text,
string customRegex,
string hotwordRegex,
InfoType infoType = null)
{
// Instantiate dlp client.
var dlp = DlpServiceClient.Create();
// Construct the content item.
var contentItem = new ContentItem
{
ByteItem = new ByteContentItem
{
Type = ByteContentItem.Types.BytesType.TextUtf8,
Data = Google.Protobuf.ByteString.CopyFromUtf8(text)
}
};
// Construct the info type if null.
var infotype = infoType ?? new InfoType { Name = "C_MRN" };
// Construct the custom regex detector.
var customInfoType = new CustomInfoType
{
InfoType = infotype,
Regex = new Regex { Pattern = customRegex },
Likelihood = Likelihood.Possible
};
// Construct hotword rule.
var hotwordRule = new DetectionRule.Types.HotwordRule
{
HotwordRegex = new Regex { Pattern = hotwordRegex },
LikelihoodAdjustment = new DetectionRule.Types.LikelihoodAdjustment
{
FixedLikelihood = Likelihood.VeryLikely
},
Proximity = new DetectionRule.Types.Proximity
{
WindowBefore = 10
}
};
// Construct the rule set for the inspect config.
var inspectionRuleSet = new InspectionRuleSet
{
InfoTypes = { infotype },
Rules =
{
new InspectionRule
{
HotwordRule = hotwordRule
}
}
};
// Construct the inspect config.
var inspectConfig = new InspectConfig
{
CustomInfoTypes = { customInfoType },
IncludeQuote = true,
RuleSet = { inspectionRuleSet },
};
// Construct the request.
var request = new InspectContentRequest
{
ParentAsLocationName = new LocationName(projectId, "global"),
Item = contentItem,
InspectConfig = inspectConfig
};
// Call the API.
var response = dlp.InspectContent(request);
// Inspect the response.
Console.WriteLine($"Findings: {response.Result.Findings.Count}");
foreach (var f in response.Result.Findings)
{
Console.WriteLine("Quote: " + f.Quote);
Console.WriteLine("Info type: " + f.InfoType.Name);
Console.WriteLine("Likelihood: " + f.Likelihood);
}
return response;
}
}
Go
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import (
"context"
"fmt"
"io"
dlp "cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
// inspectWithHotWordRules inspects data with hot word rule, it uses custom
// regex with a hot word rule to increase the likelihood match
func inspectWithHotWordRules(w io.Writer, projectID, textToInspect string) error {
// projectID := "my-project-id"
// textToInspect := "Patient's MRN 444-5-22222 and just a number 333-2-33333"
ctx := context.Background()
// Initialize a client once and reuse it to send multiple requests. Clients
// are safe to use across goroutines. When the client is no longer needed,
// call the Close method to cleanup its resources.
client, err := dlp.NewClient(ctx)
if err != nil {
return err
}
// Closing the client safely cleans up background resources.
defer client.Close()
// Specify the type and content to be inspected.
contentItem := &dlppb.ContentItem{
DataItem: &dlppb.ContentItem_ByteItem{
ByteItem: &dlppb.ByteContentItem{
Type: dlppb.ByteContentItem_TEXT_UTF8,
Data: []byte(textToInspect),
},
},
}
// Construct the custom regex detectors
customInfoType := &dlppb.CustomInfoType{
InfoType: &dlppb.InfoType{
Name: "C_MRN",
},
Type: &dlppb.CustomInfoType_Regex_{
Regex: &dlppb.CustomInfoType_Regex{
Pattern: "[1-9]{3}-[1-9]{1}-[1-9]{5}",
},
},
Likelihood: dlppb.Likelihood_POSSIBLE,
}
// Construct hotword rule.
hotWordRule := &dlppb.CustomInfoType_DetectionRule_HotwordRule{
HotwordRegex: &dlppb.CustomInfoType_Regex{
Pattern: "(?i)(mrn|medical)(?-i)",
},
Proximity: &dlppb.CustomInfoType_DetectionRule_Proximity{
WindowBefore: int32(10),
},
LikelihoodAdjustment: &dlppb.CustomInfoType_DetectionRule_LikelihoodAdjustment{
Adjustment: &dlppb.CustomInfoType_DetectionRule_LikelihoodAdjustment_FixedLikelihood{
FixedLikelihood: dlppb.Likelihood_VERY_LIKELY,
},
},
}
inspectionRuleSet := &dlppb.InspectionRuleSet{
Rules: []*dlppb.InspectionRule{
{
Type: &dlppb.InspectionRule_HotwordRule{
HotwordRule: hotWordRule,
},
},
},
InfoTypes: []*dlppb.InfoType{
customInfoType.InfoType,
},
}
// Construct the Inspect request to be sent by the client.
req := &dlppb.InspectContentRequest{
Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
Item: contentItem,
InspectConfig: &dlppb.InspectConfig{
CustomInfoTypes: []*dlppb.CustomInfoType{
customInfoType,
},
RuleSet: []*dlppb.InspectionRuleSet{
inspectionRuleSet,
},
IncludeQuote: true,
},
}
// Send the request.
resp, err := client.InspectContent(ctx, req)
if err != nil {
return err
}
// Parse the response and process results
fmt.Fprintf(w, "Findings: %v\n", len(resp.Result.Findings))
for _, v := range resp.GetResult().Findings {
fmt.Fprintf(w, "Quote: %v\n", v.GetQuote())
fmt.Fprintf(w, "InfoType Name: %v\n", v.GetInfoType().GetName())
fmt.Fprintf(w, "Likelihood: %v\n", v.GetLikelihood())
}
return nil
}
Java
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ByteContentItem;
import com.google.privacy.dlp.v2.ByteContentItem.BytesType;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CustomInfoType;
import com.google.privacy.dlp.v2.CustomInfoType.DetectionRule.HotwordRule;
import com.google.privacy.dlp.v2.CustomInfoType.DetectionRule.LikelihoodAdjustment;
import com.google.privacy.dlp.v2.CustomInfoType.DetectionRule.Proximity;
import com.google.privacy.dlp.v2.CustomInfoType.Regex;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.InspectionRule;
import com.google.privacy.dlp.v2.InspectionRuleSet;
import com.google.privacy.dlp.v2.Likelihood;
import com.google.privacy.dlp.v2.LocationName;
import com.google.protobuf.ByteString;
import java.io.IOException;
public class InspectWithHotwordRules {
public static void main(String[] args) throws Exception {
// TODO(developer): Replace these variables before running the sample.
String projectId = "your-project-id";
String textToInspect = "Patient's MRN 444-5-22222 and just a number 333-2-33333";
String customRegexPattern = "[1-9]{3}-[1-9]{1}-[1-9]{5}";
String hotwordRegexPattern = "(?i)(mrn|medical)(?-i)";
inspectWithHotwordRules(projectId, textToInspect, customRegexPattern, hotwordRegexPattern);
}
// Inspects a BigQuery Table
public static void inspectWithHotwordRules(
String projectId, String textToInspect, String customRegexPattern, String hotwordRegexPattern)
throws IOException {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Specify the type and content to be inspected.
ByteContentItem byteItem =
ByteContentItem.newBuilder()
.setType(BytesType.TEXT_UTF8)
.setData(ByteString.copyFromUtf8(textToInspect))
.build();
ContentItem item = ContentItem.newBuilder().setByteItem(byteItem).build();
// Specify the regex pattern the inspection will look for.
Regex regex = Regex.newBuilder().setPattern(customRegexPattern).build();
// Construct the custom regex detector.
InfoType infoType = InfoType.newBuilder().setName("C_MRN").build();
CustomInfoType customInfoType =
CustomInfoType.newBuilder().setInfoType(infoType).setRegex(regex).build();
// Specify hotword likelihood adjustment.
LikelihoodAdjustment likelihoodAdjustment =
LikelihoodAdjustment.newBuilder().setFixedLikelihood(Likelihood.VERY_LIKELY).build();
// Specify a window around a finding to apply a detection rule.
Proximity proximity = Proximity.newBuilder().setWindowBefore(10).build();
// Construct hotword rule.
HotwordRule hotwordRule =
HotwordRule.newBuilder()
.setHotwordRegex(Regex.newBuilder().setPattern(hotwordRegexPattern).build())
.setLikelihoodAdjustment(likelihoodAdjustment)
.setProximity(proximity)
.build();
// Construct rule set for the inspect config.
InspectionRuleSet inspectionRuleSet =
InspectionRuleSet.newBuilder()
.addInfoTypes(infoType)
.addRules(InspectionRule.newBuilder().setHotwordRule(hotwordRule))
.build();
// Construct the configuration for the Inspect request.
InspectConfig config =
InspectConfig.newBuilder()
.addCustomInfoTypes(customInfoType)
.setIncludeQuote(true)
.setMinLikelihood(Likelihood.POSSIBLE)
.addRuleSet(inspectionRuleSet)
.build();
// Construct the Inspect request to be sent by the client.
InspectContentRequest request =
InspectContentRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setItem(item)
.setInspectConfig(config)
.build();
// Use the client to send the API request.
InspectContentResponse response = dlp.inspectContent(request);
// Parse the response and process results
System.out.println("Findings: " + response.getResult().getFindingsCount());
for (Finding f : response.getResult().getFindingsList()) {
System.out.println("\tQuote: " + f.getQuote());
System.out.println("\tInfo type: " + f.getInfoType().getName());
System.out.println("\tLikelihood: " + f.getLikelihood());
}
}
}
}
Node.js
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');
// Instantiates a client
const dlp = new DLP.DlpServiceClient();
// The project ID to run the API call under
// const projectId = 'my-project';
// The string to inspect
// const string = 'Patients MRN 444-5-22222';
// The minimum likelihood required before returning a match
// const minLikelihood = DLP.protos.google.privacy.dlp.v2.Likelihood.POSSIBLE;
// The maximum number of findings to report per request (0 = server maximum)
// const maxFindings = 0;
// The infoTypes of information to match
// See https://cloud.google.com/dlp/docs/concepts-infotypes for more information
// about supported infoTypes.
// const infoTypes = [{ name: 'EMAIL_ADDRESS' }];
// The customInfoTypes of information to match
// const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}},
// { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}];
// Whether to include the matching string
// const includeQuote = true;
// Custom hotword regex patten
// const hotwordRegexPattern = '(?i)(mrn|medical)(?-i)';
async function inspectWithHotwordRule() {
// Construct item to inspect
const item = {
byteItem: {
type: DLP.protos.google.privacy.dlp.v2.ByteContentItem.BytesType
.TEXT_UTF8,
data: Buffer.from(string, 'utf-8'),
},
};
// Construct a hot word rule
const hotwordRule = {
hotwordRegex: {
pattern: hotwordRegexPattern,
},
proximity: {
windowBefore: 10,
},
likelihoodAdjustment: {
fixedLikelihood:
DLP.protos.google.privacy.dlp.v2.Likelihood.VERY_LIKELY,
},
};
// Construct a hotword inspection rule
const inpectionRuleSet = [
{
infoTypes: customInfoTypes.map(
customInfoType => customInfoType.infoType
),
rules: [{hotwordRule: hotwordRule}],
},
];
// Assigns likelihood to each match
customInfoTypes = customInfoTypes.map(customInfoType => {
customInfoType.likelihood =
DLP.protos.google.privacy.dlp.v2.Likelihood.POSSIBLE;
return customInfoType;
});
// Construct request
const request = {
parent: `projects/${projectId}/locations/global`,
inspectConfig: {
infoTypes: infoTypes,
customInfoTypes: customInfoTypes,
minLikelihood: minLikelihood,
includeQuote: includeQuote,
limits: {
maxFindingsPerRequest: maxFindings,
},
ruleSet: inpectionRuleSet,
},
item: item,
};
// Run request
const [response] = await dlp.inspectContent(request);
const findings = response.result.findings;
if (findings.length > 0) {
console.log('Findings:');
findings.forEach(finding => {
if (includeQuote) {
console.log(`\tQuote: ${finding.quote}`);
}
console.log(`\tInfo type: ${finding.infoType.name}`);
console.log(`\tLikelihood: ${finding.likelihood}`);
});
} else {
console.log('No findings.');
}
}
inspectWithHotwordRule();
PHP
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\CustomInfoType;
use Google\Cloud\Dlp\V2\CustomInfoType\DetectionRule\HotwordRule;
use Google\Cloud\Dlp\V2\CustomInfoType\DetectionRule\LikelihoodAdjustment;
use Google\Cloud\Dlp\V2\CustomInfoType\DetectionRule\Proximity;
use Google\Cloud\Dlp\V2\CustomInfoType\Regex;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\InspectContentRequest;
use Google\Cloud\Dlp\V2\InspectionRule;
use Google\Cloud\Dlp\V2\InspectionRuleSet;
use Google\Cloud\Dlp\V2\Likelihood;
/**
* Inspect data with a hotword rule
* This sample uses a custom regex with a hotword rule to increase the likelihood of match.
*
* @param string $projectId The Google Cloud project id to use as a parent resource.
* @param string $textToInspect The string to inspect.
*/
function inspect_hotword_rule(
// TODO(developer): Replace sample parameters before running the code.
string $projectId,
string $textToInspect = "Patient's MRN 444-5-22222 and just a number 333-2-33333"
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
$parent = "projects/$projectId/locations/global";
// Specify what content you want the service to Inspect.
$item = (new ContentItem())
->setValue($textToInspect);
// Specify the regex pattern the inspection will look for.
$customRegexPattern = '[1-9]{3}-[1-9]{1}-[1-9]{5}';
$hotwordRegexPattern = '(?i)(mrn|medical)(?-i)';
// Construct the custom regex detector.
$cMrnDetector = (new InfoType())
->setName('C_MRN');
$customInfoType = (new CustomInfoType())
->setInfoType($cMrnDetector)
->setLikelihood(Likelihood::POSSIBLE)
->setRegex((new Regex())
->setPattern($customRegexPattern));
// Specify hotword likelihood adjustment.
$likelihoodAdjustment = (new LikelihoodAdjustment())
->setFixedLikelihood(Likelihood::VERY_LIKELY);
// Specify a window around a finding to apply a detection rule.
$proximity = (new Proximity())
->setWindowBefore(10);
$hotwordRule = (new HotwordRule())
->setHotwordRegex((new Regex())
->setPattern($hotwordRegexPattern))
->setLikelihoodAdjustment($likelihoodAdjustment)
->setProximity($proximity);
// Construct rule set for the inspect config.
$inspectionRuleSet = (new InspectionRuleSet())
->setInfoTypes([$cMrnDetector])
->setRules([
(new InspectionRule())
->setHotwordRule($hotwordRule)
]);
// Construct the configuration for the Inspect request.
$inspectConfig = (new InspectConfig())
->setCustomInfoTypes([$customInfoType])
->setIncludeQuote(true)
->setRuleSet([$inspectionRuleSet]);
// Run request
$inspectContentRequest = (new InspectContentRequest())
->setParent($parent)
->setInspectConfig($inspectConfig)
->setItem($item);
$response = $dlp->inspectContent($inspectContentRequest);
// Print the results
$findings = $response->getResult()->getFindings();
if (count($findings) == 0) {
printf('No findings.' . PHP_EOL);
} else {
printf('Findings:' . PHP_EOL);
foreach ($findings as $finding) {
printf(' Quote: %s' . PHP_EOL, $finding->getQuote());
printf(' Info type: %s' . PHP_EOL, $finding->getInfoType()->getName());
printf(' Likelihood: %s' . PHP_EOL, Likelihood::name($finding->getLikelihood()));
}
}
}
Python
如需了解如何安装和使用敏感数据保护客户端库,请参阅 敏感数据保护客户端库。
如需向 Sensitive Data Protection 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为本地开发环境设置身份验证。
import google.cloud.dlp
def inspect_data_w_custom_hotwords(
project: str,
content_string: str,
) -> None:
"""Uses the Data Loss Prevention API to analyze string with medical record
number custom regex detector, with custom hotwords rules to boost finding
certainty under some circumstances.
Args:
project: The Google Cloud project id to use as a parent resource.
content_string: The string to inspect.
Returns:
None; the response from the API is printed to the terminal.
"""
# Instantiate a client.
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Construct a custom regex detector info type called "C_MRN",
# with ###-#-##### pattern, where each # represents a digit from 1 to 9.
# The detector has a detection likelihood of POSSIBLE.
custom_info_types = [
{
"info_type": {"name": "C_MRN"},
"regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},
"likelihood": google.cloud.dlp_v2.Likelihood.POSSIBLE,
}
]
# Construct a rule set with hotwords "mrn" and "medical", with a likelohood
# boost to VERY_LIKELY when hotwords are present within the 10 character-
# window preceding the PII finding.
hotword_rule = {
"hotword_regex": {"pattern": "(?i)(mrn|medical)(?-i)"},
"likelihood_adjustment": {
"fixed_likelihood": google.cloud.dlp_v2.Likelihood.VERY_LIKELY
},
"proximity": {"window_before": 10},
}
rule_set = [
{"info_types": [{"name": "C_MRN"}], "rules": [{"hotword_rule": hotword_rule}]}
]
# Construct the configuration dictionary with the custom regex info type.
inspect_config = {
"custom_info_types": custom_info_types,
"rule_set": rule_set,
"include_quote": True,
}
# Construct the `item`.
item = {"value": content_string}
# Convert the project id into a full resource id.
parent = f"projects/{project}"
# Call the API.
response = dlp.inspect_content(
request={"parent": parent, "inspect_config": inspect_config, "item": item}
)
# Print out the results.
if response.result.findings:
for finding in response.result.findings:
print(f"Quote: {finding.quote}")
print(f"Info type: {finding.info_type.name}")
print(f"Likelihood: {finding.likelihood}")
else:
print("No findings.")
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。