Inspecting images for sensitive data

The Cloud Data Loss Prevention (DLP) API can detect and classify sensitive data within images. The DLP API currently supports the following image types: IMAGE_JPEG, IMAGE_BMP, IMAGE_PNG, and IMAGE_SVG.

Given an image as input, the DLP API detects sensitive data in the image. The output of an inspection operation includes the found InfoTypes, the likelihood of the match, and pixel coordinates for the location of sensitive data found in the image. The coordinates of the top left corner of an image is (0,0).

Inspecting an image

Following is sample JSON and code in several languages that demonstrate how to use the DLP API to inspect images for sensitive data.

Protocol

See the JSON quickstart for more information about using the DLP API with JSON.

Sample input:

{
  "item": 
  {
    "byteItem": 
    {
      "type": "IMAGE_JPEG",
      "data": "/9j/4AAQSkZJRgABAQAASABIAAD/4QBARXhpZgAATU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAe6ADAAQAAAABAAAADAAAAAD/7QA4UGhvdG9zaG9wIDMuMAA4QklNBAQAAAAAAAA4QklNBCUAAAAAABDUHYzZjwCyBOmACZjs+EJ+/8AAEQgADAB7AwEiAAIRAQMRAf/EAB8AAAEFAQEBAQEBAAAAAAAAAAABAgMEBQYHCAkKC//EALUQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+v/EAB8BAAMBAQEBAQEBAQEAAAAAAAABAgMEBQYHCAkKC//EALURAAIBAgQEAwQHBQQEAAECdwABAgMRBAUhMQYSQVEHYXETIjKBCBRCkaGxwQkjM1LwFWJy0QoWJDThJfEXGBkaJicoKSo1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoKDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uLj5OXm5+jp6vLz9PX29/j5+v/bAEMAHBwcHBwcMBwcMEQwMDBEXERERERcdFxcXFxcdIx0dHR0dHSMjIyMjIyMjKioqKioqMTExMTE3Nzc3Nzc3Nzc3P/bAEMBIiQkODQ4YDQ0YOacgJzm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5ubm5v/dAAQACP/aAAwDAQACEQMRAD8A2UZ5JJGLlQjY2jGMAA85Heojcs4G35cMQcd/k3DqBVwwxM/mMilvUjmhYYV+6ij6AUAVZppFtk8s4kcAA4z2yTikW5LsHGQGWPj0LMQavbVGCAOBge1NEcY6KPXp75/nzQBWN03lLJsz5h+UAknoTzgcdKklkIVQuQW/PsP5mpPJhwRsXBOTwOTQ8SOoU8Y6UARQSNIh3H6Hj/8AVUQeUhV3nDvhWwM7dufTHUenSrYjULsxkHrwOab5EITywi7TzjAxQBTS4kdkL7gu1M4xgljjnPOPpVmR3dCIdwYEdsHGecbhjpUvlxkhtoyvQ46U5lVxtcAj0NAFMyMfKkDt5ZA5wvJJGM8fyp6O6yuJS3cgYBGM9sDNT+XHkHaPl6cdPpShEUllABPU+tAFSeZ8ZjJUBGbkYyVxxyOnNOHmNcYWRsAZZflwM9B0z79asPGkmN6hscjIzTtq88D5uvvQBRV5ipGW+V8HgFgMZ7DHX9KtQO0kCSN1ZQTS+TDt2bF29cY4qTpwKAP/2Q=="
    }
  },
  "inspectConfig": 
  {
    "excludeInfoTypes": false,
    "infoTypes": 
    [
      {
        "name": "PHONE_NUMBER"
      }
    ],
    "minLikelihood": "POSSIBLE"
  }
}

URL:

POST https://dlp.googleapis.com/v2/{parent=projects/*}/content:inspect

Sample output:

{
 "result": {
  "findings": [
   {
    "infoType": {
     "name": "PHONE_NUMBER"
    },
    "likelihood": "VERY_LIKELY",
    "location": {
     "contentLocations": [
      {
       "imageLocation": {
        "boundingBoxes": [
         {
          "top": -2,
          "left": 54,
          "width": 15,
          "height": 14
         },
         {
          "top": -2,
          "left": 71,
          "width": 4,
          "height": 14
         },
         {
          "top": -2,
          "left": 74,
          "width": 18,
          "height": 14
         },
         {
          "top": -2,
          "left": 93,
          "width": 4,
          "height": 14
         },
         {
          "top": -2,
          "left": 99,
          "width": 21,
          "height": 14
         }
        ]
       }
      }
     ]
    },
    "createTime": "2018-05-04T20:07:24.793Z"
   }
  ]
 }
}

Java

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

/**
 * Inspect a local file
 *
 * @param filePath The path to a local file to inspect. Can be a text, JPG, or PNG file.
 * @param minLikelihood The minimum likelihood required before returning a match
 * @param maxFindings The maximum number of findings to report (0 = server maximum)
 * @param infoTypes The infoTypes of information to match
 * @param includeQuote Whether to include the matching string
 * @param projectId Google Cloud project ID
 */
private static void inspectFile(
    String filePath,
    Likelihood minLikelihood,
    int maxFindings,
    List<InfoType> infoTypes,
    List<CustomInfoType> customInfoTypes,
    boolean includeQuote,
    String projectId) {
  // Instantiates a client
  try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
    // detect file mime type, default to application/octet-stream
    String mimeType = URLConnection.guessContentTypeFromName(filePath);
    if (mimeType == null) {
      mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath);
    }

    ByteContentItem.BytesType bytesType;
    switch (mimeType) {
      case "image/jpeg":
        bytesType = ByteContentItem.BytesType.IMAGE_JPEG;
        break;
      case "image/bmp":
        bytesType = ByteContentItem.BytesType.IMAGE_BMP;
        break;
      case "image/png":
        bytesType = ByteContentItem.BytesType.IMAGE_PNG;
        break;
      case "image/svg":
        bytesType = ByteContentItem.BytesType.IMAGE_SVG;
        break;
      default:
        bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED;
        break;
    }

    byte[] data = Files.readAllBytes(Paths.get(filePath));
    ByteContentItem byteContentItem =
        ByteContentItem.newBuilder()
            .setType(bytesType)
            .setData(ByteString.copyFrom(data))
            .build();
    ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build();

    FindingLimits findingLimits =
        FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build();

    InspectConfig inspectConfig =
        InspectConfig.newBuilder()
            .addAllInfoTypes(infoTypes)
            .addAllCustomInfoTypes(customInfoTypes)
            .setMinLikelihood(minLikelihood)
            .setLimits(findingLimits)
            .setIncludeQuote(includeQuote)
            .build();

    InspectContentRequest request =
        InspectContentRequest.newBuilder()
            .setParent(ProjectName.of(projectId).toString())
            .setInspectConfig(inspectConfig)
            .setItem(contentItem)
            .build();

    InspectContentResponse response = dlpServiceClient.inspectContent(request);

    InspectResult result = response.getResult();
    if (result.getFindingsCount() > 0) {
      System.out.println("Findings: ");
      for (Finding finding : result.getFindingsList()) {
        if (includeQuote) {
          System.out.print("\tQuote: " + finding.getQuote());
        }
        System.out.print("\tInfo type: " + finding.getInfoType().getName());
        System.out.println("\tLikelihood: " + finding.getLikelihood());
      }
    } else {
      System.out.println("No findings.");
    }
  } catch (Exception e) {
    System.out.println("Error in inspectFile: " + e.getMessage());
  }
}

Node.js

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Import other required libraries
const fs = require('fs');
const mime = require('mime');

// Instantiates a client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const callingProjectId = process.env.GCLOUD_PROJECT;

// The path to a local file to inspect. Can be a text, JPG, or PNG file.
// const fileName = 'path/to/image.png';

// The minimum likelihood required before returning a match
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';

// The maximum number of findings to report per request (0 = server maximum)
// const maxFindings = 0;

// The infoTypes of information to match
// const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];

// Whether to include the matching string
// const includeQuote = true;

// Construct file data to inspect
const fileTypeConstant =
  ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf(
    mime.getType(filepath)
  ) + 1;
const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64');
const item = {
  byteItem: {
    type: fileTypeConstant,
    data: fileBytes,
  },
};

// Construct request
const request = {
  parent: dlp.projectPath(callingProjectId),
  inspectConfig: {
    infoTypes: infoTypes,
    minLikelihood: minLikelihood,
    includeQuote: includeQuote,
    limits: {
      maxFindingsPerRequest: maxFindings,
    },
  },
  item: item,
};

// Run request
dlp
  .inspectContent(request)
  .then(response => {
    const findings = response[0].result.findings;
    if (findings.length > 0) {
      console.log(`Findings:`);
      findings.forEach(finding => {
        if (includeQuote) {
          console.log(`\tQuote: ${finding.quote}`);
        }
        console.log(`\tInfo type: ${finding.infoType.name}`);
        console.log(`\tLikelihood: ${finding.likelihood}`);
      });
    } else {
      console.log(`No findings.`);
    }
  })
  .catch(err => {
    console.log(`Error in inspectFile: ${err.message || err}`);
  });

Python

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

def inspect_file(project, filename, info_types, min_likelihood=None,
                 custom_dictionaries=None, custom_regexes=None,
                 max_findings=None, include_quote=True, mime_type=None):
    """Uses the Data Loss Prevention API to analyze a file for protected data.
    Args:
        project: The Google Cloud project id to use as a parent resource.
        filename: The path to the file to inspect.
        info_types: A list of strings representing info types to look for.
            A full list of info type categories can be fetched from the API.
        min_likelihood: A string representing the minimum likelihood threshold
            that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
            'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
        max_findings: The maximum number of findings to report; 0 = no maximum.
        include_quote: Boolean for whether to display a quote of the detected
            information in the results.
        mime_type: The MIME type of the file. If not specified, the type is
            inferred via the Python standard library's mimetypes module.
    Returns:
        None; the response from the API is printed to the terminal.
    """

    import mimetypes

    # Import the client library.
    import google.cloud.dlp

    # Instantiate a client.
    dlp = google.cloud.dlp.DlpServiceClient()

    # Prepare info_types by converting the list of strings into a list of
    # dictionaries (protos are also accepted).
    if not info_types:
        info_types = ['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS']
    info_types = [{'name': info_type} for info_type in info_types]

    # Prepare custom_info_types by parsing the dictionary word lists and
    # regex patterns.
    if custom_dictionaries is None:
        custom_dictionaries = []
    dictionaries = [{
        'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
        'dictionary': {
            'word_list': {'words': custom_dict.split(',')}
        }
    } for i, custom_dict in enumerate(custom_dictionaries)]
    if custom_regexes is None:
        custom_regexes = []
    regexes = [{
        'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
        'regex': {'pattern': custom_regex}
    } for i, custom_regex in enumerate(custom_regexes)]
    custom_info_types = dictionaries + regexes

    # Construct the configuration dictionary. Keys which are None may
    # optionally be omitted entirely.
    inspect_config = {
        'info_types': info_types,
        'custom_info_types': custom_info_types,
        'min_likelihood': min_likelihood,
        'limits': {'max_findings_per_request': max_findings},
    }

    # If mime_type is not specified, guess it from the filename.
    if mime_type is None:
        mime_guess = mimetypes.MimeTypes().guess_type(filename)
        mime_type = mime_guess[0]

    # Select the content type index from the list of supported types.
    supported_content_types = {
        None: 0,  # "Unspecified"
        'image/jpeg': 1,
        'image/bmp': 2,
        'image/png': 3,
        'image/svg': 4,
        'text/plain': 5,
    }
    content_type_index = supported_content_types.get(mime_type, 0)

    # Construct the item, containing the file's byte data.
    with open(filename, mode='rb') as f:
        item = {'byte_item': {'type': content_type_index, 'data': f.read()}}

    # Convert the project id into a full resource id.
    parent = dlp.project_path(project)

    # Call the API.
    response = dlp.inspect_content(parent, inspect_config, item)

    # Print out the results.
    if response.result.findings:
        for finding in response.result.findings:
            try:
                print('Quote: {}'.format(finding.quote))
            except AttributeError:
                pass
            print('Info type: {}'.format(finding.info_type.name))
            print('Likelihood: {}'.format(finding.likelihood))
    else:
        print('No findings.')

Go

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

// inspectFile searches for the given info types in the given Reader (with the given bytesType).
func inspectFile(w io.Writer, client *dlp.Client, project string, minLikelihood dlppb.Likelihood, maxFindings int32, includeQuote bool, infoTypes []string, customDictionaries []string, customRegexes []string, bytesType dlppb.ByteContentItem_BytesType, input io.Reader) {
	// Convert the info type strings to a list of InfoTypes.
	var i []*dlppb.InfoType
	for _, it := range infoTypes {
		i = append(i, &dlppb.InfoType{Name: it})
	}
	// Convert the custom dictionary word lists and custom regexes to a list of CustomInfoTypes.
	var customInfoTypes []*dlppb.CustomInfoType
	for idx, it := range customDictionaries {
		customInfoTypes = append(customInfoTypes, &dlppb.CustomInfoType{
			InfoType: &dlppb.InfoType{
				Name: fmt.Sprintf("CUSTOM_DICTIONARY_%d", idx),
			},
			Type: &dlppb.CustomInfoType_Dictionary_{
				Dictionary: &dlppb.CustomInfoType_Dictionary{
					Source: &dlppb.CustomInfoType_Dictionary_WordList_{
						WordList: &dlppb.CustomInfoType_Dictionary_WordList{
							Words: strings.Split(it, ","),
						},
					},
				},
			},
		})
	}
	for idx, it := range customRegexes {
		customInfoTypes = append(customInfoTypes, &dlppb.CustomInfoType{
			InfoType: &dlppb.InfoType{
				Name: fmt.Sprintf("CUSTOM_REGEX_%d", idx),
			},
			Type: &dlppb.CustomInfoType_Regex_{
				Regex: &dlppb.CustomInfoType_Regex{
					Pattern: it,
				},
			},
		})
	}
	b, err := ioutil.ReadAll(input)
	if err != nil {
		log.Fatalf("error reading file: %v", err)
	}
	// Create a configured request.
	req := &dlppb.InspectContentRequest{
		Parent: "projects/" + project,
		InspectConfig: &dlppb.InspectConfig{
			InfoTypes:       i,
			CustomInfoTypes: customInfoTypes,
			MinLikelihood:   minLikelihood,
			Limits: &dlppb.InspectConfig_FindingLimits{
				MaxFindingsPerRequest: maxFindings,
			},
			IncludeQuote: includeQuote,
		},
		// The item to analyze.
		Item: &dlppb.ContentItem{
			DataItem: &dlppb.ContentItem_ByteItem{
				ByteItem: &dlppb.ByteContentItem{
					Type: bytesType,
					Data: b,
				},
			},
		},
	}
	// Send the request.
	resp, err := client.InspectContent(context.Background(), req)
	if err != nil {
		log.Fatal(err)
	}
	// Print the result.
	fmt.Fprintln(w, resp.GetResult())
}

PHP

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

use Google\Cloud\Dlp\V2\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\Likelihood;
use Google\Cloud\Dlp\V2\InspectConfig_FindingLimits;
use Google\Cloud\Dlp\V2\ByteContentItem;

/**
 * Inspect a local file.
 *
 * @param string $callingProjectId The project ID to run the API call under
 * @param string $path The file path to the file to inspect
 * @param int $maxFindings (Optional) The maximum number of findings to report per request (0 = server maximum)
 */
function inspect_file(
    $callingProjectId,
    $path,
    $maxFindings = 0
) {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    // The infoTypes of information to match
    $usNameInfoType = (new InfoType())
        ->setName('PERSON_NAME');
    $phoneNumberInfoType = (new InfoType())
        ->setName('PHONE_NUMBER');
    $infoTypes = [$usNameInfoType, $phoneNumberInfoType];

    // The minimum likelihood required before returning a match
    $minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED;

    // Whether to include the matching string in the response
    $includeQuote = true;

    // Specify finding limits
    $limits = (new InspectConfig_FindingLimits())
        ->setMaxFindingsPerRequest($maxFindings);

    // Create the configuration object
    $inspectConfig = (new InspectConfig())
        ->setMinLikelihood($minLikelihood)
        ->setLimits($limits)
        ->setInfoTypes($infoTypes)
        ->setIncludeQuote($includeQuote);

    // Create the content item objects
    $typeConstant = (int) array_search(
        mime_content_type($path),
        [false, 'image/jpeg', 'image/bmp', 'image/png', 'image/svg']
    );

    $byteContent = (new ByteContentItem())
        ->setType($typeConstant)
        ->setData(file_get_contents($path));

    $content = (new ContentItem())
        ->setByteItem($byteContent);

    $parent = $dlp->projectName($callingProjectId);

    // Run request
    $response = $dlp->inspectContent($parent, [
        'inspectConfig' => $inspectConfig,
        'item' => $content
    ]);

    $likelihoods = ['Unknown', 'Very unlikely', 'Unlikely', 'Possible',
                    'Likely', 'Very likely'];

    // Print the results
    $findings = $response->getResult()->getFindings();
    if (count($findings) == 0) {
        print('No findings.' . PHP_EOL);
    } else {
        print('Findings:' . PHP_EOL);
        foreach ($findings as $finding) {
            if ($includeQuote) {
                print('  Quote: ' . $finding->getQuote() . PHP_EOL);
            }
            print('  Info type: ' . $finding->getInfoType()->getName() . PHP_EOL);
            print('  Likelihood: ' . $likelihoods[$finding->getLikelihood()] . PHP_EOL);
        }
    }
}

Ruby

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

# project_id   = "Your Google Cloud project ID"
# filename     = "The file path to the file to inspect"
# max_findings = "Maximum number of findings to report per request (0 = server maximum)"

require "google/cloud/dlp"

dlp = Google::Cloud::Dlp.new
inspect_config = {
  # The types of information to match
  info_types: [{name: "PERSON_NAME"}, {name: "PHONE_NUMBER"}],

  # Only return results above a likelihood threshold (0 for all)
  min_likelihood: :POSSIBLE,

  # Limit the number of findings (0 for no limit)
  limits: { max_findings_per_request: max_findings },

  # Whether to include the matching string in the response
  include_quote: true
}

# The item to inspect
file = File.open filename, "rb"
item_to_inspect = { byte_item: { type: :BYTES_TYPE_UNSPECIFIED, data: file.read } }

# Run request
parent = "projects/#{project_id}"
response = dlp.inspect_content parent,
  inspect_config: inspect_config,
  item:           item_to_inspect

# Print the results
if response.result.findings.empty?
  puts "No findings"
else
  response.result.findings.each do |finding|
    puts "Quote:      #{finding.quote}"
    puts "Info type:  #{finding.info_type.name}"
    puts "Likelihood: #{finding.likelihood}"
  end
end

C#

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

static readonly Dictionary<string, ByteContentItem.Types.BytesType> s_fileTypes =
    new Dictionary<string, ByteContentItem.Types.BytesType>()
{
    { ".bmp", ByteContentItem.Types.BytesType.ImageBmp },
    { ".jpg", ByteContentItem.Types.BytesType.ImageJpeg },
    { ".jpeg", ByteContentItem.Types.BytesType.ImageJpeg },
    { ".png", ByteContentItem.Types.BytesType.ImagePng },
    { ".svg", ByteContentItem.Types.BytesType.ImageSvg },
    { ".txt", ByteContentItem.Types.BytesType.TextUtf8 }
};

public static object InspectFile(
    string projectId,
    string file,
    string minLikelihood,
    int maxFindings,
    bool includeQuote,
    IEnumerable<InfoType> infoTypes,
    IEnumerable<CustomInfoType> customInfoTypes)
{
    var fileStream = new FileStream(file, FileMode.Open);
    try
    {
        var inspectConfig = new InspectConfig
        {
            MinLikelihood = (Likelihood)System.Enum.Parse(typeof(Likelihood), minLikelihood),
            Limits = new FindingLimits
            {
                MaxFindingsPerRequest = maxFindings
            },
            IncludeQuote = includeQuote,
            InfoTypes = { infoTypes },
            CustomInfoTypes = { customInfoTypes }
        };
        DlpServiceClient dlp = DlpServiceClient.Create();
        InspectContentResponse response = dlp.InspectContent(new InspectContentRequest
        {
            ParentAsProjectName = new Google.Cloud.Dlp.V2.ProjectName(projectId),
            Item = new ContentItem
            {
                ByteItem = new ByteContentItem
                {
                    Data = ByteString.FromStream(fileStream),
                    Type = s_fileTypes.GetValueOrDefault(
                            new FileInfo(file).Extension.ToLower(),
                            ByteContentItem.Types.BytesType.Unspecified
                    )
                }
            },
            InspectConfig = inspectConfig
        });

        var findings = response.Result.Findings;
        if (findings.Count > 0)
        {
            Console.WriteLine("Findings:");
            foreach (var finding in findings)
            {
                if (includeQuote)
                {
                    Console.WriteLine($"  Quote: {finding.Quote}");
                }
                Console.WriteLine($"  InfoType: {finding.InfoType}");
                Console.WriteLine($"  Likelihood: {finding.Likelihood}");
            }
        }
        else
        {
            Console.WriteLine("No findings.");
        }

        return 0;
    }
    finally
    {
        fileStream.Close();
    }
}

Was this page helpful? Let us know how we did:

Send feedback about...

Data Loss Prevention API