Redact only certain sensitive data from an image using infoTypes

Redact only certain sensitive data from an image.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
using Google.Protobuf;
using System;
using System.IO;

public class RedactImageWithListedInfotypes
{
    public static RedactImageResponse Redact(string projectId, string originalImagePath, string redactedImagePath)
    {
        var request = new RedactImageRequest
        {
            Parent = new LocationName(projectId, "global").ToString(),
            InspectConfig = new InspectConfig
            {
                MinLikelihood = Likelihood.Likely,
                Limits = new InspectConfig.Types.FindingLimits() { MaxFindingsPerItem = 5 },
                IncludeQuote = true,
                InfoTypes =
                    {
                        new InfoType { Name = "PHONE_NUMBER" },
                        new InfoType { Name = "EMAIL_ADDRESS" }
                    }
            },
            ByteItem = new ByteContentItem
            {
                Type = ByteContentItem.Types.BytesType.ImagePng,
                Data = ByteString.FromStream(new FileStream(originalImagePath, FileMode.Open))
            },
        };

        var client = DlpServiceClient.Create();
        var response = client.RedactImage(request);

        Console.WriteLine($"Extracted text: {response.ExtractedText}");

        // Writes redacted image into file
        response.RedactedImage.WriteTo(new FileStream(redactedImagePath, FileMode.Create, FileAccess.Write));

        return response;
    }
}

Go

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"
	"os"

	dlp "cloud.google.com/go/dlp/apiv2"
	"cloud.google.com/go/dlp/apiv2/dlppb"
)

// redactImageFileListedInfoTypes redacts only certain sensitive
// data from an image using infoTypes
func redactImageFileListedInfoTypes(w io.Writer, projectID, inputPath, outputPath string) error {
	// projectId := "my-project-id"
	// inputPath := "testdata/image.jpg"
	// outputPath := "testdata/test-output-image-file-listed-infoTypes-redacted.jpeg"

	ctx := context.Background()

	// Initialize a client once and reuse it to send multiple requests. Clients
	// are safe to use across goroutines. When the client is no longer needed,
	// call the Close method to cleanup its resources.
	client, err := dlp.NewClient(ctx)
	if err != nil {
		return err
	}

	// Closing the client safely cleans up background resources.
	defer client.Close()

	// read the image file
	fileBytes, err := os.ReadFile(inputPath)
	if err != nil {
		return err
	}

	// Specify the content to be redacted.
	byteItem := &dlppb.ByteContentItem{
		Type: dlppb.ByteContentItem_IMAGE_JPEG,
		Data: fileBytes,
	}

	// Specify the types of info necessary to redact.
	// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
	infoTypes := []*dlppb.InfoType{
		{Name: "US_SOCIAL_SECURITY_NUMBER"},
		{Name: "EMAIL_ADDRESS"},
		{Name: "PHONE_NUMBER"},
	}

	inspectConfig := &dlppb.InspectConfig{
		InfoTypes: infoTypes,
	}

	// Prepare redaction configs.
	var x []*dlppb.RedactImageRequest_ImageRedactionConfig
	for _, v := range infoTypes {
		x = append(x, &dlppb.RedactImageRequest_ImageRedactionConfig{Target: &dlppb.RedactImageRequest_ImageRedactionConfig_InfoType{InfoType: v}})
	}

	// Construct the Inspect request to be sent by the client.
	req := &dlppb.RedactImageRequest{
		Parent:                fmt.Sprintf("projects/%s/locations/global", projectID),
		ByteItem:              byteItem,
		ImageRedactionConfigs: x,
		InspectConfig:         inspectConfig,
	}

	// Send the request.
	resp, err := client.RedactImage(ctx, req)
	if err != nil {
		return err
	}

	// Write the output file.
	if err := os.WriteFile(outputPath, resp.GetRedactedImage(), 0644); err != nil {
		return err
	}
	fmt.Fprintf(w, "Wrote output to %s\n", outputPath)
	return nil
}

Java

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ByteContentItem;
import com.google.privacy.dlp.v2.ByteContentItem.BytesType;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.RedactImageRequest;
import com.google.privacy.dlp.v2.RedactImageRequest.ImageRedactionConfig;
import com.google.privacy.dlp.v2.RedactImageResponse;
import com.google.protobuf.ByteString;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

class RedactImageFileListedInfoTypes {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "my-project-id";
    String inputPath = "src/test/resources/sensitive-data-image.jpeg";
    String outputPath = "sensitive-data-image-redacted.jpeg";
    redactImageFileListedInfoTypes(projectId, inputPath, outputPath);
  }

  static void redactImageFileListedInfoTypes(String projectId, String inputPath, String outputPath)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify the content to be redacted.
      ByteString fileBytes = ByteString.readFrom(new FileInputStream(inputPath));
      ByteContentItem byteItem =
          ByteContentItem.newBuilder().setType(BytesType.IMAGE_JPEG).setData(fileBytes).build();

      // Specify the types of info necessary to redact.
      List<InfoType> infoTypes = new ArrayList<>();
      // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
      for (String typeName :
          new String[] {"US_SOCIAL_SECURITY_NUMBER", "EMAIL_ADDRESS", "PHONE_NUMBER"}) {
        infoTypes.add(InfoType.newBuilder().setName(typeName).build());
      }
      InspectConfig inspectConfig = InspectConfig.newBuilder().addAllInfoTypes(infoTypes).build();

      // Prepare redaction configs.
      List<ImageRedactionConfig> imageRedactionConfigs =
          infoTypes.stream()
              .map(infoType -> ImageRedactionConfig.newBuilder().setInfoType(infoType).build())
              .collect(Collectors.toList());

      // Construct the Redact request to be sent by the client.
      RedactImageRequest request =
          RedactImageRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setByteItem(byteItem)
              .addAllImageRedactionConfigs(imageRedactionConfigs)
              .setInspectConfig(inspectConfig)
              .build();

      // Use the client to send the API request.
      RedactImageResponse response = dlp.redactImage(request);

      // Parse the response and process results.
      FileOutputStream redacted = new FileOutputStream(outputPath);
      redacted.write(response.getRedactedImage().toByteArray());
      redacted.close();
      System.out.println("Redacted image written to " + outputPath);
    }
  }
}

Node.js

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Imports required Node.js libraries
const mime = require('mime');
const fs = require('fs');

// Instantiates a client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const projectId = 'my-project';

// The path to a local file to inspect. Can be a JPG or PNG image file.
// const filepath = 'path/to/image.png';

// The infoTypes of information to redact
// const infoTypes = [{ name: 'EMAIL_ADDRESS' }, { name: 'PHONE_NUMBER' }];

// The local path to save the resulting image to.
// const outputPath = 'result.png';

async function redactImageWithInfoTypes() {
  // Load image
  const fileTypeConstant =
    ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf(
      mime.getType(filepath)
    ) + 1;
  const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64');

  // Construct image redaction request
  const request = {
    parent: `projects/${projectId}/locations/global`,
    byteItem: {
      type: fileTypeConstant,
      data: fileBytes,
    },
    inspectConfig: {
      infoTypes: infoTypes,
    },
    imageRedactionConfigs: infoTypes.map(infoType => ({infoType: infoType})),
  };

  // Run image redaction request
  const [response] = await dlp.redactImage(request);
  const image = response.redactedImage;
  fs.writeFileSync(outputPath, image);
  console.log(`Saved image redaction results to path: ${outputPath}`);
}
redactImageWithInfoTypes();

PHP

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

use Google\Cloud\Dlp\V2\ByteContentItem;
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\RedactImageRequest;
use Google\Cloud\Dlp\V2\RedactImageRequest\ImageRedactionConfig;

/**
 * Redact only certain sensitive data from an image using infoTypes.
 *
 * @param string $callingProjectId    The project ID to run the API call under.
 * @param string $imagePath           The local filepath of the image to redact.
 * @param string $outputPath          The local filepath to save the resulting image to.
 */
function redact_image_listed_infotypes(
    // TODO(developer): Replace sample parameters before running the code.
    string $callingProjectId,
    string $imagePath = './test/data/test.png',
    string $outputPath = './test/data/redact_image_listed_infotypes.png'
): void {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    // Specify the types of info necessary to redact.
    $infoTypes = [
        (new InfoType())
            ->setName('US_SOCIAL_SECURITY_NUMBER'),
        (new InfoType())
            ->setName('EMAIL_ADDRESS'),
        (new InfoType())
            ->setName('PHONE_NUMBER'),
    ];

    // Create the configuration object.
    $inspectConfig = (new InspectConfig())
        ->setInfoTypes($infoTypes);

    // Read image file into a buffer.
    $imageRef = fopen($imagePath, 'rb');
    $imageBytes = fread($imageRef, filesize($imagePath));
    fclose($imageRef);

    // Get the image's content type.
    $typeConstant = (int) array_search(
        mime_content_type($imagePath),
        [false, 'image/jpeg', 'image/bmp', 'image/png', 'image/svg']
    );

    // Create the byte-storing object.
    $byteContent = (new ByteContentItem())
        ->setType($typeConstant)
        ->setData($imageBytes);

    // Create the image redaction config objects.
    $imageRedactionConfigs = [];
    foreach ($infoTypes as $infoType) {
        $config = (new ImageRedactionConfig())
            ->setInfoType($infoType);
        $imageRedactionConfigs[] = $config;
    }

    $parent = "projects/$callingProjectId/locations/global";

    // Run request.
    $redactImageRequest = (new RedactImageRequest())
        ->setParent($parent)
        ->setInspectConfig($inspectConfig)
        ->setByteItem($byteContent)
        ->setImageRedactionConfigs($imageRedactionConfigs);
    $response = $dlp->redactImage($redactImageRequest);

    // Save result to file.
    file_put_contents($outputPath, $response->getRedactedImage());

    // Print completion message.
    printf('Redacted image saved to %s' . PHP_EOL, $outputPath);
}

Python

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import mimetypes
from typing import List, Optional

import google.cloud.dlp


def redact_image_listed_info_types(
    project: str,
    filename: str,
    output_filename: str,
    info_types: List[str],
    min_likelihood: Optional[str] = None,
    mime_type: Optional[str] = None,
) -> None:
    """Uses the Data Loss Prevention API to redact protected data in an image.
    Args:
        project: The Google Cloud project id to use as a parent resource.
        filename: The path to the file to inspect.
        output_filename: The path to which the redacted image will be written.
            A full list of info type categories can be fetched from the API.
        info_types: A list of strings representing info types to look for.
            A full list of info type categories can be fetched from the API.
        min_likelihood: A string representing the minimum likelihood threshold
            that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
            'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
        mime_type: The MIME type of the file. If not specified, the type is
            inferred via the Python standard library's mimetypes module.
    Returns:
        None; the response from the API is printed to the terminal.
    """

    # Instantiate a client.
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Prepare info_types by converting the list of strings into a list of
    # dictionaries (protos are also accepted).
    info_types = [{"name": info_type} for info_type in info_types]

    # Prepare image_redaction_configs, a list of dictionaries. Each dictionary
    # contains an info_type and optionally the color used for the replacement.
    # The color is omitted in this sample, so the default (black) will be used.
    image_redaction_configs = []
    if info_types is not None:
        for info_type in info_types:
            image_redaction_configs.append({"info_type": info_type})

    # Construct the configuration dictionary. Keys which are None may
    # optionally be omitted entirely.
    inspect_config = {"min_likelihood": min_likelihood, "info_types": info_types}

    # If mime_type is not specified, guess it from the filename.
    if mime_type is None:
        mime_guess = mimetypes.MimeTypes().guess_type(filename)
        mime_type = mime_guess[0] or "application/octet-stream"

    # Select the content type index from the list of supported types.
    # https://github.com/googleapis/googleapis/blob/master/google/privacy/dlp/v2/dlp.proto / message ByteContentItem
    supported_content_types = {
        None: 0,  # "Unspecified" or BYTES_TYPE_UNSPECIFIED
        "image/jpeg": 1,  # IMAGE_JPEG
        "image/bmp": 2,  # IMAGE_BMP
        "image/png": 3,  # IMAGE_PNG
        "image/svg": 4,  # IMAGE_SVG - Adjusted to "image/svg+xml" for correct MIME type
        "text/plain": 5,  # TEXT_UTF8
        # Note: No specific MIME type for general "image", mapping to IMAGE for any image type not specified
        "image": 6,  # IMAGE - Any image type
        "application/msword": 7,  # WORD_DOCUMENT
        "application/pdf": 8,  # PDF
        "application/powerpoint": 9,  # POWERPOINT_DOCUMENT
        "application/msexcel": 10,  # EXCEL_DOCUMENT
        "application/avro": 11,  # AVRO
        "text/csv": 12,  # CSV
        "text/tsv": 13,  # TSV
    }
    content_type_index = supported_content_types.get(mime_type, 0)

    # Construct the byte_item, containing the file's byte data.
    with open(filename, mode="rb") as f:
        byte_item = {"type_": content_type_index, "data": f.read()}

    # Convert the project id into a full resource id.
    parent = f"projects/{project}"

    # Call the API.
    response = dlp.redact_image(
        request={
            "parent": parent,
            "inspect_config": inspect_config,
            "image_redaction_configs": image_redaction_configs,
            "byte_item": byte_item,
        }
    )

    # Write out the results.
    with open(output_filename, mode="wb") as f:
        f.write(response.redacted_image)
    print(f"Wrote {len(response.redacted_image)} to {output_filename}")

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.