import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
public class InspectTable {
public static void main(String[] args) throws Exception {
// TODO(developer): Replace these variables before running the sample.
String projectId = "your-project-id";
Table tableToInspect =
Table.newBuilder()
.addHeaders(FieldId.newBuilder().setName("name").build())
.addHeaders(FieldId.newBuilder().setName("phone").build())
.addRows(
Row.newBuilder()
.addValues(Value.newBuilder().setStringValue("John Doe").build())
.addValues(Value.newBuilder().setStringValue("(206) 555-0123").build()))
.build();
inspectTable(projectId, tableToInspect);
}
// Inspects the provided text.
public static void inspectTable(String projectId, Table tableToInspect) {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Specify the table to be inspected.
ContentItem item = ContentItem.newBuilder().setTable(tableToInspect).build();
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
InfoType infoType = InfoType.newBuilder().setName("PHONE_NUMBER").build();
// Construct the configuration for the Inspect request.
InspectConfig config =
InspectConfig.newBuilder().addInfoTypes(infoType).setIncludeQuote(true).build();
// Construct the Inspect request to be sent by the client.
InspectContentRequest request =
InspectContentRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setItem(item)
.setInspectConfig(config)
.build();
// Use the client to send the API request.
InspectContentResponse response = dlp.inspectContent(request);
// Parse the response and process results
System.out.println("Findings: " + response.getResult().getFindingsCount());
for (Finding f : response.getResult().getFindingsList()) {
System.out.println("\tQuote: " + f.getQuote());
System.out.println("\tInfo type: " + f.getInfoType().getName());
System.out.println("\tLikelihood: " + f.getLikelihood());
}
} catch (Exception e) {
System.out.println("Error during inspectString: \n" + e.toString());
}
}
}
from typing import List, Optional # noqa: E402, I100
import google.cloud.dlp # noqa: F811, E402
def inspect_table(
project: str,
data: str,
info_types: List[str],
custom_dictionaries: List[str] = None,
custom_regexes: List[str] = None,
min_likelihood: Optional[str] = None,
max_findings: Optional[int] = None,
include_quote: bool = True,
) -> None:
"""Uses the Data Loss Prevention API to analyze strings for protected data.
Args:
project: The Google Cloud project id to use as a parent resource.
data: Json string representing table data.
info_types: A list of strings representing info types to look for.
A full list of info type categories can be fetched from the API.
min_likelihood: A string representing the minimum likelihood threshold
that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
max_findings: The maximum number of findings to report; 0 = no maximum.
include_quote: Boolean for whether to display a quote of the detected
information in the results.
Returns:
None; the response from the API is printed to the terminal.
Example:
data = {
"header":[
"email",
"phone number"
],
"rows":[
[
"robertfrost@xyz.com",
"4232342345"
],
[
"johndoe@pqr.com",
"4253458383"
]
]
}
>> $ python inspect_content.py table \
'{"header": ["email", "phone number"],
"rows": [["robertfrost@xyz.com", "4232342345"],
["johndoe@pqr.com", "4253458383"]]}'
>> Quote: robertfrost@xyz.com
Info type: EMAIL_ADDRESS
Likelihood: 4
Quote: johndoe@pqr.com
Info type: EMAIL_ADDRESS
Likelihood: 4
"""
# Instantiate a client.
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Prepare info_types by converting the list of strings into a list of
# dictionaries (protos are also accepted).
info_types = [{"name": info_type} for info_type in info_types]
# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if custom_dictionaries is None:
custom_dictionaries = []
dictionaries = [
{
"info_type": {"name": f"CUSTOM_DICTIONARY_{i}"},
"dictionary": {"word_list": {"words": custom_dict.split(",")}},
}
for i, custom_dict in enumerate(custom_dictionaries)
]
if custom_regexes is None:
custom_regexes = []
regexes = [
{
"info_type": {"name": f"CUSTOM_REGEX_{i}"},
"regex": {"pattern": custom_regex},
}
for i, custom_regex in enumerate(custom_regexes)
]
custom_info_types = dictionaries + regexes
# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
inspect_config = {
"info_types": info_types,
"custom_info_types": custom_info_types,
"min_likelihood": min_likelihood,
"include_quote": include_quote,
"limits": {"max_findings_per_request": max_findings},
}
# Construct the `table`. For more details on the table schema, please see
# https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
headers = [{"name": val} for val in data["header"]]
rows = []
for row in data["rows"]:
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
table = {}
table["headers"] = headers
table["rows"] = rows
item = {"table": table}
# Convert the project id into a full resource id.
parent = f"projects/{project}"
# Call the API.
response = dlp.inspect_content(
request={"parent": parent, "inspect_config": inspect_config, "item": item}
)
# Print out the results.
if response.result.findings:
for finding in response.result.findings:
try:
if finding.quote:
print(f"Quote: {finding.quote}")
except AttributeError:
pass
print(f"Info type: {finding.info_type.name}")
print(f"Likelihood: {finding.likelihood}")
else:
print("No findings.")