Inspecting Text for Sensitive Data

The DLP API can detect and classify sensitive data. Given text input, the API returns details about any InfoTypes found in the text, a likelihood value, and offset information.

Inspecting a String

You can use the code samples below to easily check a string for sensitive data. You can also feed information to the API using JSON over HTTP.

Protocol

Protocol

See the JSON quickstart for more information on using JSON.

Sample Input:

{
  "items":
  [
    {
      "type": "text/plain",
      "value": "My phone number is (415) 555-0890"
    }
  ],
  "inspectConfig":
  {
    "includeQuote": false,
    "minLikelihood": "POSSIBLE"
  }
}

URL:

  POST https://dlp.googleapis.com/v2beta1/content:inspect

Sample Output:

[
  {
    "quote": "(415) 555-0890",
    "infoType": {
      "name": "PHONE_NUMBER"
    },
    "likelihood": "POSSIBLE",
    "location": {
      "byteRange": {
        "start": "19",
        "end": "33"
      },
      "codepointRange": {
      "start": "19",
      "end": "33"
      }
    },
    "createTime": "1970-01-18T05:28:03.541776Z"
  }
]

Java

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

// instantiate a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {

  // The minimum likelihood required before returning a match
  // minLikelihood = LIKELIHOOD_UNSPECIFIED;

  // The maximum number of findings to report (0 = server maximum)
  // maxFindings = 0;

  // The infoTypes of information to match
  // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME'];

  // Whether to include the matching string
  // includeQuote = true;
  InspectConfig inspectConfig =
      InspectConfig.newBuilder()
          .addAllInfoTypes(infoTypes)
          .setMinLikelihood(minLikelihood)
          .setMaxFindings(maxFindings)
          .setIncludeQuote(includeQuote)
          .build();

  // The string to inspect
  // string = 'My name is Gary and my email is gary@example.com';
  ContentItem contentItem =
      ContentItem.newBuilder().setType("text/plain").setValue(string).build();

  InspectContentRequest request =
      InspectContentRequest.newBuilder()
          .setInspectConfig(inspectConfig)
          .addItems(contentItem)
          .build();
  InspectContentResponse response = dlpServiceClient.inspectContent(request);

  for (InspectResult result : response.getResultsList()) {
    if (result.getFindingsCount() > 0) {
      System.out.println("Findings: ");
      for (Finding finding : result.getFindingsList()) {
        if (includeQuote) {
          System.out.print("Quote: " + finding.getQuote());
        }
        System.out.print("\tInfo type: " + finding.getInfoType().getName());
        System.out.println("\tLikelihood: " + finding.getLikelihood());
      }
    } else {
      System.out.println("No findings.");
    }
  }
} catch (Exception e) {
  System.out.println("Error in inspectString: " + e.getMessage());
}

Node.js

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Instantiates a client
const dlp = DLP();

// The string to inspect
// const string = 'My name is Gary and my email is gary@example.com';

// The minimum likelihood required before returning a match
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';

// The maximum number of findings to report (0 = server maximum)
// const maxFindings = 0;

// The infoTypes of information to match
// const infoTypes = [{ name: 'US_MALE_NAME', name: 'US_FEMALE_NAME' }];

// Whether to include the matching string
// const includeQuote = true;

// Construct items to inspect
const items = [{ type: 'text/plain', value: string }];

// Construct request
const request = {
  inspectConfig: {
    infoTypes: infoTypes,
    minLikelihood: minLikelihood,
    maxFindings: maxFindings,
    includeQuote: includeQuote
  },
  items: items
};

// Run request
dlp.inspectContent(request)
  .then((response) => {
    const findings = response[0].results[0].findings;
    if (findings.length > 0) {
      console.log(`Findings:`);
      findings.forEach((finding) => {
        if (includeQuote) {
          console.log(`\tQuote: ${finding.quote}`);
        }
        console.log(`\tInfo type: ${finding.infoType.name}`);
        console.log(`\tLikelihood: ${finding.likelihood}`);
      });
    } else {
      console.log(`No findings.`);
    }
  })
  .catch((err) => {
    console.log(`Error in inspectString: ${err.message || err}`);
  });

PHP

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

use Google\Cloud\Dlp\V2beta1\DlpServiceClient;
use Google\Privacy\Dlp\V2beta1\ContentItem;
use Google\Privacy\Dlp\V2beta1\InfoType;
use Google\Privacy\Dlp\V2beta1\InspectConfig;
use Google\Privacy\Dlp\V2beta1\Likelihood;

/**
 * Inspect a string using the Data Loss Prevention (DLP) API.
 *
 * @param string $string The text to inspect
 */
function inspect_string(
    $string,
    $minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED,
    $maxFindings = 0)
{
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    // The infoTypes of information to match
    $usMaleNameInfoType = new InfoType();
    $usMaleNameInfoType->setName('US_MALE_NAME');
    $usFemaleNameInfoType = new InfoType();
    $usFemaleNameInfoType->setName('US_FEMALE_NAME');
    $infoTypes = [$usMaleNameInfoType, $usFemaleNameInfoType];

    // Whether to include the matching string in the response
    $includeQuote = true;

    // Create the configuration object
    $inspectConfig = new InspectConfig();
    $inspectConfig->setMinLikelihood($minLikelihood);
    $inspectConfig->setMaxFindings($maxFindings);
    $inspectConfig->setInfoTypes($infoTypes);
    $inspectConfig->setIncludeQuote($includeQuote);

    $content = new ContentItem();
    $content->setType('text/plain');
    $content->setValue($string);

    // Run request
    $response = $dlp->inspectContent($inspectConfig, [$content]);

    $likelihoods = ['Unknown', 'Very unlikely', 'Unlikely', 'Possible',
                    'Likely', 'Very likely'];

    // Print the results
    $findings = $response->getResults()[0]->getFindings();
    if (count($findings) == 0) {
        print('No findings.' . PHP_EOL);
    } else {
        print('Findings:' . PHP_EOL);
        foreach ($findings as $finding) {
            if ($includeQuote) {
                print('  Quote: ' . $finding->getQuote() . PHP_EOL);
            }
            print('  Info type: ' . $finding->getInfoType()->getName() . PHP_EOL);
            $likelihoodString = $likelihoods[$finding->getLikelihood()];
            print('  Likelihood: ' . $likelihoodString . PHP_EOL);
        }
    }
}

Inspecting a Text File

Java

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

// Instantiates a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
  // The path to a local file to inspect. Can be a text, JPG, or PNG file.
  // fileName = 'path/to/image.png';

  // The minimum likelihood required before returning a match
  // minLikelihood = LIKELIHOOD_UNSPECIFIED;

  // The maximum number of findings to report (0 = server maximum)
  // maxFindings = 0;

  // The infoTypes of information to match
  // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME'];

  // Whether to include the matching string
  // includeQuote = true;
  Path path = Paths.get(filePath);

  // detect file mime type, default to application/octet-stream
  String mimeType = URLConnection.guessContentTypeFromName(filePath);
  if (mimeType == null) {
    mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath);
  }
  if (mimeType == null) {
    mimeType = "application/octet-stream";
  }

  byte[] data = Files.readAllBytes(path);
  ContentItem contentItem =
      ContentItem.newBuilder().setType(mimeType).setData(ByteString.copyFrom(data)).build();

  InspectConfig inspectConfig =
      InspectConfig.newBuilder()
          .addAllInfoTypes(infoTypes)
          .setMinLikelihood(minLikelihood)
          .setMaxFindings(maxFindings)
          .setIncludeQuote(includeQuote)
          .build();

  InspectContentRequest request =
      InspectContentRequest.newBuilder()
          .setInspectConfig(inspectConfig)
          .addItems(contentItem)
          .build();
  InspectContentResponse response = dlpServiceClient.inspectContent(request);

  for (InspectResult result : response.getResultsList()) {
    if (result.getFindingsCount() > 0) {
      System.out.println("Findings: ");
      for (Finding finding : result.getFindingsList()) {
        if (includeQuote) {
          System.out.print("Quote: " + finding.getQuote());
        }
        System.out.print("\tInfo type: " + finding.getInfoType().getName());
        System.out.println("\tLikelihood: " + finding.getLikelihood());
      }
    } else {
      System.out.println("No findings.");
    }
  }
} catch (Exception e) {
  e.printStackTrace();
  System.out.println("Error in inspectFile: " + e.getMessage());
}

Node.js

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Instantiates a client
const dlp = DLP();

// The path to a local file to inspect. Can be a text, JPG, or PNG file.
// const fileName = 'path/to/image.png';

// The minimum likelihood required before returning a match
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';

// The maximum number of findings to report (0 = server maximum)
// const maxFindings = 0;

// The infoTypes of information to match
// const infoTypes = [{ name: 'US_MALE_NAME' }, { name: 'US_FEMALE_NAME' }];

// Whether to include the matching string
// const includeQuote = true;

// Construct file data to inspect
const fileItems = [{
  type: mime.lookup(filepath) || 'application/octet-stream',
  data: Buffer.from(fs.readFileSync(filepath)).toString('base64')
}];

// Construct request
const request = {
  inspectConfig: {
    infoTypes: infoTypes,
    minLikelihood: minLikelihood,
    maxFindings: maxFindings,
    includeQuote: includeQuote
  },
  items: fileItems
};

// Run request
dlp.inspectContent(request)
  .then((response) => {
    const findings = response[0].results[0].findings;
    if (findings.length > 0) {
      console.log(`Findings:`);
      findings.forEach((finding) => {
        if (includeQuote) {
          console.log(`\tQuote: ${finding.quote}`);
        }
        console.log(`\tInfo type: ${finding.infoType.name}`);
        console.log(`\tLikelihood: ${finding.likelihood}`);
      });
    } else {
      console.log(`No findings.`);
    }
  })
  .catch((err) => {
    console.log(`Error in inspectFile: ${err.message || err}`);
  });

PHP

For more on installing and creating a DLP API client, refer to DLP API Client Libraries.

use Google\Cloud\Dlp\V2beta1\DlpServiceClient;
use Google\Privacy\Dlp\V2beta1\ContentItem;
use Google\Privacy\Dlp\V2beta1\InfoType;
use Google\Privacy\Dlp\V2beta1\InspectConfig;
use Google\Privacy\Dlp\V2beta1\Likelihood;

/**
 * Inspect a file using the Data Loss Prevention (DLP) API.
 *
 * @param string $path The file path to the file to inspect
 */
function inspect_file(
    $path,
    $minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED,
    $maxFindings = 0)
{
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    // The infoTypes of information to match
    $usMaleNameInfoType = new InfoType();
    $usMaleNameInfoType->setName('US_MALE_NAME');
    $usFemaleNameInfoType = new InfoType();
    $usFemaleNameInfoType->setName('US_FEMALE_NAME');
    $infoTypes = [$usMaleNameInfoType, $usFemaleNameInfoType];

    // Whether to include the matching string in the response
    $includeQuote = true;

    // Create the configuration object
    $inspectConfig = new InspectConfig();
    $inspectConfig->setMinLikelihood($minLikelihood);
    $inspectConfig->setMaxFindings($maxFindings);
    $inspectConfig->setInfoTypes($infoTypes);
    $inspectConfig->setIncludeQuote($includeQuote);

    // Construct file data to inspect
    $content = new ContentItem();
    $content->setType(mime_content_type($path) ?: 'application/octet-stream');
    $content->setData(file_get_contents($path));

    // Run request
    $response = $dlp->inspectContent($inspectConfig, [$content]);

    $likelihoods = ['Unknown', 'Very unlikely', 'Unlikely', 'Possible',
                    'Likely', 'Very likely'];

    // Print the results
    $findings = $response->getResults()[0]->getFindings();
    if (count($findings) == 0) {
        print('No findings.' . PHP_EOL);
    } else {
        print('Findings:' . PHP_EOL);
        foreach ($findings as $finding) {
            if ($includeQuote) {
                print('  Quote: ' . $finding->getQuote() . PHP_EOL);
            }
            print('  Info type: ' . $finding->getInfoType()->getName() . PHP_EOL);
            $likelihoodString = $likelihoods[$finding->getLikelihood()];
            print('  Likelihood: ' . $likelihoodString . PHP_EOL);
        }
    }
}

Monitor your resources on the go

Get the Google Cloud Console app to help you manage your projects.

Send feedback about...

Data Loss Prevention API