Cloud Data Loss Prevention (Cloud DLP) is now a part of Sensitive Data Protection. The API name remains the same: Cloud Data Loss Prevention API (DLP API). For information about the services that make up Sensitive Data Protection, see
Sensitive Data Protection overview.
Redact an image
Stay organized with collections
Save and categorize content based on your preferences.
Demonstrates redacting sensitive data from an image.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
Python
To learn how to install and use the client library for Sensitive Data Protection, see
Sensitive Data Protection client libraries.
To authenticate to Sensitive Data Protection, set up Application Default Credentials.
For more information, see
Set up authentication for a local development environment.
import mimetypes
from typing import List
import google.cloud.dlp
def redact_image(
project: str,
filename: str,
output_filename: str,
info_types: List[str],
min_likelihood: str = None,
mime_type: str = None,
) -> None:
"""Uses the Data Loss Prevention API to redact protected data in an image.
Args:
project: The Google Cloud project id to use as a parent resource.
filename: The path to the file to inspect.
output_filename: The path to which the redacted image will be written.
info_types: A list of strings representing info types to look for.
A full list of info type categories can be fetched from the API.
min_likelihood: A string representing the minimum likelihood threshold
that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
mime_type: The MIME type of the file. If not specified, the type is
inferred via the Python standard library's mimetypes module.
Returns:
None; the response from the API is printed to the terminal.
"""
# Instantiate a client.
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Prepare info_types by converting the list of strings into a list of
# dictionaries (protos are also accepted).
info_types = [{"name": info_type} for info_type in info_types]
# Prepare image_redaction_configs, a list of dictionaries. Each dictionary
# contains an info_type and optionally the color used for the replacement.
# The color is omitted in this sample, so the default (black) will be used.
image_redaction_configs = []
if info_types is not None:
for info_type in info_types:
image_redaction_configs.append({"info_type": info_type})
# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
inspect_config = {
"min_likelihood": min_likelihood,
"info_types": info_types,
}
# If mime_type is not specified, guess it from the filename.
if mime_type is None:
mime_guess = mimetypes.MimeTypes().guess_type(filename)
mime_type = mime_guess[0] or "application/octet-stream"
# Select the content type index from the list of supported types.
# https://github.com/googleapis/googleapis/blob/master/google/privacy/dlp/v2/dlp.proto / message ByteContentItem
supported_content_types = {
None: 0, # "Unspecified" or BYTES_TYPE_UNSPECIFIED
"image/jpeg": 1, # IMAGE_JPEG
"image/bmp": 2, # IMAGE_BMP
"image/png": 3, # IMAGE_PNG
"image/svg": 4, # IMAGE_SVG - Adjusted to "image/svg+xml" for correct MIME type
"text/plain": 5, # TEXT_UTF8
# Note: No specific MIME type for general "image", mapping to IMAGE for any image type not specified
"image": 6, # IMAGE - Any image type
"application/msword": 7, # WORD_DOCUMENT
"application/pdf": 8, # PDF
"application/powerpoint": 9, # POWERPOINT_DOCUMENT
"application/msexcel": 10, # EXCEL_DOCUMENT
"application/avro": 11, # AVRO
"text/csv": 12, # CSV
"text/tsv": 13, # TSV
}
content_type_index = supported_content_types.get(mime_type, 0)
# Construct the byte_item, containing the file's byte data.
with open(filename, mode="rb") as f:
byte_item = {"type_": content_type_index, "data": f.read()}
# Convert the project id into a full resource id.
parent = f"projects/{project}"
# Call the API.
response = dlp.redact_image(
request={
"parent": parent,
"inspect_config": inspect_config,
"image_redaction_configs": image_redaction_configs,
"byte_item": byte_item,
}
)
# Write out the results.
with open(output_filename, mode="wb") as f:
f.write(response.redacted_image)
print(
"Wrote {byte_count} to {filename}".format(
byte_count=len(response.redacted_image), filename=output_filename
)
)
Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.
[[["Easy to understand","easyToUnderstand","thumb-up"],["Solved my problem","solvedMyProblem","thumb-up"],["Other","otherUp","thumb-up"]],[["Hard to understand","hardToUnderstand","thumb-down"],["Incorrect information or sample code","incorrectInformationOrSampleCode","thumb-down"],["Missing the information/samples I need","missingTheInformationSamplesINeed","thumb-down"],["Other","otherDown","thumb-down"]],[],[],[]]