テキストに含まれる機密データの検査

Cloud Data Loss Prevention(DLP)を使用すると、テキストに含まれる機密データを検出し、分類できます。テキストを渡すと、Cloud DLP API がテキスト内で検出した infoType可能性の値、オフセット情報を返します。

テキスト文字列の検査

次に、Cloud DLP API を使用してテキスト文字列内の機密データを検査する方法を示す JSON とコードの例をいくつかの言語で示します。

プロトコル

JSON で Cloud DLP API を使用する方法については、JSON クイックスタートをご覧ください。

JSON 入力:

POST https://dlp.googleapis.com/v2/projects/[PROJECT_ID]/content:inspect?key={YOUR_API_KEY}

{
  "item":{
    "value":"My phone number is (415) 555-0890"
  },
  "inspectConfig":{
    "includeQuote":true,
    "minLikelihood":"POSSIBLE",
    "infoTypes":{
      "name":"PHONE_NUMBER"
    }
  }
}

JSON 出力:

{
  "result":{
    "findings":[
      {
        "quote":"(415) 555-0890",
        "infoType":{
          "name":"PHONE_NUMBER"
        },
        "likelihood":"VERY_LIKELY",
        "location":{
          "byteRange":{
            "start":"19",
            "end":"33"
          },
          "codepointRange":{
            "start":"19",
            "end":"33"
          }
        },
        "createTime":"2018-11-13T19:29:15.412Z"
      }
    ]
  }
}

Java

import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ByteContentItem;
import com.google.privacy.dlp.v2.ByteContentItem.BytesType;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.ProjectName;
import com.google.protobuf.ByteString;
import java.util.ArrayList;
import java.util.List;

public class InspectString {

  // Inspects the provided text.
  public static void inspectString(String projectId, String textToInspect) {
    // String projectId = "my-project-id";
    // String textToInspect = "My name is Gary and my email is gary@example.com";

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify the project used for request.
      ProjectName project = ProjectName.of(projectId);

      // Specify the type and content to be inspected.
      ByteContentItem byteItem = ByteContentItem.newBuilder()
          .setType(BytesType.TEXT_UTF8)
          .setData(ByteString.copyFromUtf8(textToInspect))
          .build();
      ContentItem item = ContentItem.newBuilder().setByteItem(byteItem).build();

      // Specify the type of info the inspection will look for.
      List<InfoType> infoTypes = new ArrayList<>();
      // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
      for (String typeName : new String[] {"PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD_NUMBER"}) {
        infoTypes.add(InfoType.newBuilder().setName(typeName).build());
      }

      // Construct the configuration for the Inspect request.
      InspectConfig config = InspectConfig.newBuilder()
          .addAllInfoTypes(infoTypes)
          .setIncludeQuote(true)
          .build();

      // Construct the Inspect request to be sent by the client.
      InspectContentRequest request = InspectContentRequest.newBuilder()
          .setParent(project.toString())
          .setItem(item)
          .setInspectConfig(config)
          .build();

      // Use the client to send the API request.
      InspectContentResponse response = dlp.inspectContent(request);

      // Parse the response and process results
      System.out.println("Findings: " + response.getResult().getFindingsCount());
      for (Finding f : response.getResult().getFindingsList()) {
        System.out.println("\tQuote: " + f.getQuote());
        System.out.println("\tInfo type: " + f.getInfoType().getName());
        System.out.println("\tLikelihood: " + f.getLikelihood());
      }
    } catch (Exception e) {
      System.out.println("Error during inspectString: \n" + e.toString());
    }
  }
}

Node.js

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Instantiates a client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const callingProjectId = process.env.GCLOUD_PROJECT;

// The string to inspect
// const string = 'My name is Gary and my email is gary@example.com';

// The minimum likelihood required before returning a match
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';

// The maximum number of findings to report per request (0 = server maximum)
// const maxFindings = 0;

// The infoTypes of information to match
// const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];

// The customInfoTypes of information to match
// const customInfoTypes = [{ name: 'DICT_TYPE', dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}},
//   { name: 'REGEX_TYPE', regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}];

// Whether to include the matching string
// const includeQuote = true;

// Construct item to inspect
const item = {value: string};

// Construct request
const request = {
  parent: dlp.projectPath(callingProjectId),
  inspectConfig: {
    infoTypes: infoTypes,
    customInfoTypes: customInfoTypes,
    minLikelihood: minLikelihood,
    includeQuote: includeQuote,
    limits: {
      maxFindingsPerRequest: maxFindings,
    },
  },
  item: item,
};

// Run request
try {
  const [response] = await dlp.inspectContent(request);
  const findings = response.result.findings;
  if (findings.length > 0) {
    console.log(`Findings:`);
    findings.forEach(finding => {
      if (includeQuote) {
        console.log(`\tQuote: ${finding.quote}`);
      }
      console.log(`\tInfo type: ${finding.infoType.name}`);
      console.log(`\tLikelihood: ${finding.likelihood}`);
    });
  } else {
    console.log(`No findings.`);
  }
} catch (err) {
  console.log(`Error in inspectString: ${err.message || err}`);
}

Python

def inspect_string(project, content_string, info_types,
                   custom_dictionaries=None, custom_regexes=None,
                   min_likelihood=None, max_findings=None, include_quote=True):
    """Uses the Data Loss Prevention API to analyze strings for protected data.
    Args:
        project: The Google Cloud project id to use as a parent resource.
        content_string: The string to inspect.
        info_types: A list of strings representing info types to look for.
            A full list of info type categories can be fetched from the API.
        min_likelihood: A string representing the minimum likelihood threshold
            that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
            'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
        max_findings: The maximum number of findings to report; 0 = no maximum.
        include_quote: Boolean for whether to display a quote of the detected
            information in the results.
    Returns:
        None; the response from the API is printed to the terminal.
    """

    # Import the client library.
    import google.cloud.dlp

    # Instantiate a client.
    dlp = google.cloud.dlp.DlpServiceClient()

    # Prepare info_types by converting the list of strings into a list of
    # dictionaries (protos are also accepted).
    info_types = [{'name': info_type} for info_type in info_types]

    # Prepare custom_info_types by parsing the dictionary word lists and
    # regex patterns.
    if custom_dictionaries is None:
        custom_dictionaries = []
    dictionaries = [{
        'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
        'dictionary': {
            'word_list': {'words': custom_dict.split(',')}
        }
    } for i, custom_dict in enumerate(custom_dictionaries)]
    if custom_regexes is None:
        custom_regexes = []
    regexes = [{
        'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
        'regex': {'pattern': custom_regex}
    } for i, custom_regex in enumerate(custom_regexes)]
    custom_info_types = dictionaries + regexes

    # Construct the configuration dictionary. Keys which are None may
    # optionally be omitted entirely.
    inspect_config = {
        'info_types': info_types,
        'custom_info_types': custom_info_types,
        'min_likelihood': min_likelihood,
        'include_quote': include_quote,
        'limits': {'max_findings_per_request': max_findings},
    }

    # Construct the `item`.
    item = {'value': content_string}

    # Convert the project id into a full resource id.
    parent = dlp.project_path(project)

    # Call the API.
    response = dlp.inspect_content(parent, inspect_config, item)

    # Print out the results.
    if response.result.findings:
        for finding in response.result.findings:
            try:
                if finding.quote:
                    print('Quote: {}'.format(finding.quote))
            except AttributeError:
                pass
            print('Info type: {}'.format(finding.info_type.name))
            print('Likelihood: {}'.format(finding.likelihood))
    else:
        print('No findings.')

Go

// inspectString searches for the given infoTypes in the input.
func inspectString(w io.Writer, client *dlp.Client, project string, minLikelihood dlppb.Likelihood, maxFindings int32, includeQuote bool, infoTypes []string, customDictionaries []string, customRegexes []string, input string) {
	// Convert the info type strings to a list of InfoTypes.
	var i []*dlppb.InfoType
	for _, it := range infoTypes {
		i = append(i, &dlppb.InfoType{Name: it})
	}
	// Convert the custom dictionary word lists and custom regexes to a list of CustomInfoTypes.
	var customInfoTypes []*dlppb.CustomInfoType
	for idx, it := range customDictionaries {
		customInfoTypes = append(customInfoTypes, &dlppb.CustomInfoType{
			InfoType: &dlppb.InfoType{
				Name: fmt.Sprintf("CUSTOM_DICTIONARY_%d", idx),
			},
			Type: &dlppb.CustomInfoType_Dictionary_{
				Dictionary: &dlppb.CustomInfoType_Dictionary{
					Source: &dlppb.CustomInfoType_Dictionary_WordList_{
						WordList: &dlppb.CustomInfoType_Dictionary_WordList{
							Words: strings.Split(it, ","),
						},
					},
				},
			},
		})
	}
	for idx, it := range customRegexes {
		customInfoTypes = append(customInfoTypes, &dlppb.CustomInfoType{
			InfoType: &dlppb.InfoType{
				Name: fmt.Sprintf("CUSTOM_REGEX_%d", idx),
			},
			Type: &dlppb.CustomInfoType_Regex_{
				Regex: &dlppb.CustomInfoType_Regex{
					Pattern: it,
				},
			},
		})
	}
	// Create a configured request.
	req := &dlppb.InspectContentRequest{
		Parent: "projects/" + project,
		InspectConfig: &dlppb.InspectConfig{
			InfoTypes:       i,
			CustomInfoTypes: customInfoTypes,
			MinLikelihood:   minLikelihood,
			Limits: &dlppb.InspectConfig_FindingLimits{
				MaxFindingsPerRequest: maxFindings,
			},
			IncludeQuote: includeQuote,
		},
		// The item to analyze.
		Item: &dlppb.ContentItem{
			DataItem: &dlppb.ContentItem_Value{
				Value: input,
			},
		},
	}
	// Send the request.
	resp, err := client.InspectContent(context.Background(), req)
	if err != nil {
		log.Fatal(err)
	}
	// Print the result.
	fmt.Fprintln(w, resp.GetResult())
}

PHP

use Google\Cloud\Dlp\V2\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\Likelihood;
use Google\Cloud\Dlp\V2\InspectConfig\FindingLimits;

/**
 * Inspect a string.
 *
 * @param string $callingProjectId The GCP Project ID to run the API call under
 * @param string $string The text to inspect
 * @param int $maxFindings The maximum number of findings to report per request (0 = server maximum)
 */
function inspect_string(
  $callingProjectId,
  $string,
  $maxFindings = 0
) {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    // The infoTypes of information to match
    $personNameInfoType = (new InfoType())
        ->setName('PERSON_NAME');
    $phoneNumberInfoType = (new InfoType())
        ->setName('PHONE_NUMBER');
    $infoTypes = [$personNameInfoType, $phoneNumberInfoType];

    // The minimum likelihood required before returning a match
    $minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED;

    // Whether to include the matching string in the response
    $includeQuote = true;

    // Specify finding limits
    $limits = (new FindingLimits())
        ->setMaxFindingsPerRequest($maxFindings);

    // Create the configuration object
    $inspectConfig = (new InspectConfig())
        ->setMinLikelihood($minLikelihood)
        ->setLimits($limits)
        ->setInfoTypes($infoTypes)
        ->setIncludeQuote($includeQuote);

    $content = (new ContentItem())
        ->setValue($string);

    $parent = $dlp->projectName($callingProjectId);

    // Run request
    $response = $dlp->inspectContent($parent, [
        'inspectConfig' => $inspectConfig,
        'item' => $content
    ]);

    $likelihoods = ['Unknown', 'Very unlikely', 'Unlikely', 'Possible',
                    'Likely', 'Very likely'];

    // Print the results
    $findings = $response->getResult()->getFindings();
    if (count($findings) == 0) {
        print('No findings.' . PHP_EOL);
    } else {
        print('Findings:' . PHP_EOL);
        foreach ($findings as $finding) {
            if ($includeQuote) {
                print('  Quote: ' . $finding->getQuote() . PHP_EOL);
            }
            print('  Info type: ' . $finding->getInfoType()->getName() . PHP_EOL);
            $likelihoodString = $likelihoods[$finding->getLikelihood()];
            print('  Likelihood: ' . $likelihoodString . PHP_EOL);
        }
    }
}

Ruby

# project_id   = "Your Google Cloud project ID"
# content      = "The text to inspect"
# max_findings = "Maximum number of findings to report per request (0 = server maximum)"

require "google/cloud/dlp"

dlp = Google::Cloud::Dlp.new
inspect_config = {
  # The types of information to match
  info_types:     [{ name: "PERSON_NAME" }, { name: "US_STATE" }],

  # Only return results above a likelihood threshold (0 for all)
  min_likelihood: :POSSIBLE,

  # Limit the number of findings (0 for no limit)
  limits:         { max_findings_per_request: max_findings },

  # Whether to include the matching string in the response
  include_quote:  true
}

# The item to inspect
item_to_inspect = { value: content }

# Run request
parent = "projects/#{project_id}"
response = dlp.inspect_content parent,
                               inspect_config: inspect_config,
                               item:           item_to_inspect

# Print the results
if response.result.findings.empty?
  puts "No findings"
else
  response.result.findings.each do |finding|
    puts "Quote:      #{finding.quote}"
    puts "Info type:  #{finding.info_type.name}"
    puts "Likelihood: #{finding.likelihood}"
  end
end

C#

public static object InspectString(
    string projectId,
    string dataValue,
    string minLikelihood,
    int maxFindings,
    bool includeQuote,
    IEnumerable<InfoType> infoTypes,
    IEnumerable<CustomInfoType> customInfoTypes)
{
    var inspectConfig = new InspectConfig
    {
        MinLikelihood = (Likelihood)System.Enum.Parse(typeof(Likelihood), minLikelihood),
        Limits = new InspectConfig.Types.FindingLimits
        {
            MaxFindingsPerRequest = maxFindings
        },
        IncludeQuote = includeQuote,
        InfoTypes = { infoTypes },
        CustomInfoTypes = { customInfoTypes }
    };
    var request = new InspectContentRequest
    {
        ParentAsProjectName = new ProjectName(projectId),
        Item = new ContentItem
        {
            Value = dataValue
        },
        InspectConfig = inspectConfig
    };

    DlpServiceClient dlp = DlpServiceClient.Create();
    InspectContentResponse response = dlp.InspectContent(request);

    var findings = response.Result.Findings;
    if (findings.Count > 0)
    {
        Console.WriteLine("Findings:");
        foreach (var finding in findings)
        {
            if (includeQuote)
            {
                Console.WriteLine($"  Quote: {finding.Quote}");
            }
            Console.WriteLine($"  InfoType: {finding.InfoType}");
            Console.WriteLine($"  Likelihood: {finding.Likelihood}");
        }
    }
    else
    {
        Console.WriteLine("No findings.");
    }

    return 0;
}

テーブルの検査

以下に、テーブル内の機密データを検査するサンプルコードを示します。 テーブルではさまざまながサポートされます。

JSON で Cloud DLP API を使用する方法については、JSON クイックスタートをご覧ください。

JSON 入力:

POST https://dlp.googleapis.com/v2/projects/[PROJECT_ID]/content:inspect?key={YOUR_API_KEY}

{
  "item":{
    "table":{
      "headers": [{"name":"column1"}],
      "rows": [{
        "values":[
          {"string_value": "My phone number is (206) 555-0123"},
        ]},
      ],
    }
  },
  "inspectConfig":{
    "infoTypes":[
      {
        "name":"PHONE_NUMBER"
      },
      {
        "name":"US_TOLLFREE_PHONE_NUMBER"
      }
    ],
    "minLikelihood":"POSSIBLE",
    "limits":{
      "maxFindingsPerItem":0
    },
    "includeQuote":true
  }
}

JSON 出力:

{
  "result":{
    "findings":[
      {
        "quote":"(206) 555-0123",
        "infoType":{
          "name":"PHONE_NUMBER"
        },
        "likelihood":"LIKELY",
        "location":{
          "byteRange":{
            "start":"19",
            "end":"33"
          },
          "codepointRange":{
            "start":"19",
            "end":"33"
          },
          "contentLocations":[
            {
              "recordLocation":{
                "fieldId":{
                  "name":"column1"
                },
                "tableLocation":{

                }
              }
            }
          ]
        },
        "createTime":"2018-10-30T00:09:04.569Z"
      }
    ]
  }
}

テキスト ファイルの検査

以下に、テキスト ファイル内の機密データを検査するサンプルコードを示します。

Java

import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ByteContentItem;
import com.google.privacy.dlp.v2.ByteContentItem.BytesType;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.ProjectName;
import com.google.protobuf.ByteString;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;

public class InspectTextFile {

  // Inspects the specified text file.
  public static void inspectTextFile(String projectId, String filePath) {
    // String projectId = "my-project-id";
    // String filePath = "path/to/image.png";

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify the project used for request.
      ProjectName project = ProjectName.of(projectId);

      // Specify the type and content to be inspected.
      ByteString fileBytes = ByteString.readFrom(new FileInputStream(filePath));
      ByteContentItem byteItem = ByteContentItem.newBuilder()
          .setType(BytesType.TEXT_UTF8)
          .setData(fileBytes)
          .build();
      ContentItem item = ContentItem.newBuilder()
          .setByteItem(byteItem)
          .build();

      // Specify the type of info the inspection will look for.
      List<InfoType> infoTypes = new ArrayList<>();
      // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
      for (String typeName : new String[] {"PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD_NUMBER"}) {
        infoTypes.add(InfoType.newBuilder().setName(typeName).build());
      }

      // Construct the configuration for the Inspect request.
      InspectConfig config = InspectConfig.newBuilder()
          .addAllInfoTypes(infoTypes)
          .setIncludeQuote(true)
          .build();

      // Construct the Inspect request to be sent by the client.
      InspectContentRequest request = InspectContentRequest.newBuilder()
          .setParent(project.toString())
          .setItem(item)
          .setInspectConfig(config)
          .build();

      // Use the client to send the API request.
      InspectContentResponse response = dlp.inspectContent(request);

      // Parse the response and process results
      System.out.println("Findings: " + response.getResult().getFindingsCount());
      for (Finding f : response.getResult().getFindingsList()) {
        System.out.println("\tQuote: " + f.getQuote());
        System.out.println("\tInfo type: " + f.getInfoType().getName());
        System.out.println("\tLikelihood: " + f.getLikelihood());
      }
    } catch (Exception e) {
      System.out.println("Error during inspectFile: \n" + e.toString());
    }
  }
}

Node.js

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Import other required libraries
const fs = require('fs');
const mime = require('mime');

// Instantiates a client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const callingProjectId = process.env.GCLOUD_PROJECT;

// The path to a local file to inspect. Can be a text, JPG, or PNG file.
// const fileName = 'path/to/image.png';

// The minimum likelihood required before returning a match
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';

// The maximum number of findings to report per request (0 = server maximum)
// const maxFindings = 0;

// The infoTypes of information to match
// const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];

// The customInfoTypes of information to match
// const customInfoTypes = [{ name: 'DICT_TYPE', dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}},
//   { name: 'REGEX_TYPE', regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}];

// Whether to include the matching string
// const includeQuote = true;

// Construct file data to inspect
const fileTypeConstant =
  ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf(
    mime.getType(filepath)
  ) + 1;
const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64');
const item = {
  byteItem: {
    type: fileTypeConstant,
    data: fileBytes,
  },
};

// Construct request
const request = {
  parent: dlp.projectPath(callingProjectId),
  inspectConfig: {
    infoTypes: infoTypes,
    customInfoTypes: customInfoTypes,
    minLikelihood: minLikelihood,
    includeQuote: includeQuote,
    limits: {
      maxFindingsPerRequest: maxFindings,
    },
  },
  item: item,
};

// Run request
try {
  const [response] = await dlp.inspectContent(request);
  const findings = response.result.findings;
  if (findings.length > 0) {
    console.log(`Findings:`);
    findings.forEach(finding => {
      if (includeQuote) {
        console.log(`\tQuote: ${finding.quote}`);
      }
      console.log(`\tInfo type: ${finding.infoType.name}`);
      console.log(`\tLikelihood: ${finding.likelihood}`);
    });
  } else {
    console.log(`No findings.`);
  }
} catch (err) {
  console.log(`Error in inspectFile: ${err.message || err}`);
}

Python


def inspect_file(project, filename, info_types, min_likelihood=None,
                 custom_dictionaries=None, custom_regexes=None,
                 max_findings=None, include_quote=True, mime_type=None):
    """Uses the Data Loss Prevention API to analyze a file for protected data.
    Args:
        project: The Google Cloud project id to use as a parent resource.
        filename: The path to the file to inspect.
        info_types: A list of strings representing info types to look for.
            A full list of info type categories can be fetched from the API.
        min_likelihood: A string representing the minimum likelihood threshold
            that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
            'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
        max_findings: The maximum number of findings to report; 0 = no maximum.
        include_quote: Boolean for whether to display a quote of the detected
            information in the results.
        mime_type: The MIME type of the file. If not specified, the type is
            inferred via the Python standard library's mimetypes module.
    Returns:
        None; the response from the API is printed to the terminal.
    """

    import mimetypes

    # Import the client library.
    import google.cloud.dlp

    # Instantiate a client.
    dlp = google.cloud.dlp.DlpServiceClient()

    # Prepare info_types by converting the list of strings into a list of
    # dictionaries (protos are also accepted).
    if not info_types:
        info_types = ['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS']
    info_types = [{'name': info_type} for info_type in info_types]

    # Prepare custom_info_types by parsing the dictionary word lists and
    # regex patterns.
    if custom_dictionaries is None:
        custom_dictionaries = []
    dictionaries = [{
        'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
        'dictionary': {
            'word_list': {'words': custom_dict.split(',')}
        }
    } for i, custom_dict in enumerate(custom_dictionaries)]
    if custom_regexes is None:
        custom_regexes = []
    regexes = [{
        'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
        'regex': {'pattern': custom_regex}
    } for i, custom_regex in enumerate(custom_regexes)]
    custom_info_types = dictionaries + regexes

    # Construct the configuration dictionary. Keys which are None may
    # optionally be omitted entirely.
    inspect_config = {
        'info_types': info_types,
        'custom_info_types': custom_info_types,
        'min_likelihood': min_likelihood,
        'limits': {'max_findings_per_request': max_findings},
    }

    # If mime_type is not specified, guess it from the filename.
    if mime_type is None:
        mime_guess = mimetypes.MimeTypes().guess_type(filename)
        mime_type = mime_guess[0]

    # Select the content type index from the list of supported types.
    supported_content_types = {
        None: 0,  # "Unspecified"
        'image/jpeg': 1,
        'image/bmp': 2,
        'image/png': 3,
        'image/svg': 4,
        'text/plain': 5,
    }
    content_type_index = supported_content_types.get(mime_type, 0)

    # Construct the item, containing the file's byte data.
    with open(filename, mode='rb') as f:
        item = {'byte_item': {'type': content_type_index, 'data': f.read()}}

    # Convert the project id into a full resource id.
    parent = dlp.project_path(project)

    # Call the API.
    response = dlp.inspect_content(parent, inspect_config, item)

    # Print out the results.
    if response.result.findings:
        for finding in response.result.findings:
            try:
                print('Quote: {}'.format(finding.quote))
            except AttributeError:
                pass
            print('Info type: {}'.format(finding.info_type.name))
            print('Likelihood: {}'.format(finding.likelihood))
    else:
        print('No findings.')

Go

// inspectFile searches for the given info types in the given Reader (with the given bytesType).
func inspectFile(w io.Writer, client *dlp.Client, project string, minLikelihood dlppb.Likelihood, maxFindings int32, includeQuote bool, infoTypes []string, customDictionaries []string, customRegexes []string, bytesType dlppb.ByteContentItem_BytesType, input io.Reader) {
	// Convert the info type strings to a list of InfoTypes.
	var i []*dlppb.InfoType
	for _, it := range infoTypes {
		i = append(i, &dlppb.InfoType{Name: it})
	}
	// Convert the custom dictionary word lists and custom regexes to a list of CustomInfoTypes.
	var customInfoTypes []*dlppb.CustomInfoType
	for idx, it := range customDictionaries {
		customInfoTypes = append(customInfoTypes, &dlppb.CustomInfoType{
			InfoType: &dlppb.InfoType{
				Name: fmt.Sprintf("CUSTOM_DICTIONARY_%d", idx),
			},
			Type: &dlppb.CustomInfoType_Dictionary_{
				Dictionary: &dlppb.CustomInfoType_Dictionary{
					Source: &dlppb.CustomInfoType_Dictionary_WordList_{
						WordList: &dlppb.CustomInfoType_Dictionary_WordList{
							Words: strings.Split(it, ","),
						},
					},
				},
			},
		})
	}
	for idx, it := range customRegexes {
		customInfoTypes = append(customInfoTypes, &dlppb.CustomInfoType{
			InfoType: &dlppb.InfoType{
				Name: fmt.Sprintf("CUSTOM_REGEX_%d", idx),
			},
			Type: &dlppb.CustomInfoType_Regex_{
				Regex: &dlppb.CustomInfoType_Regex{
					Pattern: it,
				},
			},
		})
	}
	b, err := ioutil.ReadAll(input)
	if err != nil {
		log.Fatalf("error reading file: %v", err)
	}
	// Create a configured request.
	req := &dlppb.InspectContentRequest{
		Parent: "projects/" + project,
		InspectConfig: &dlppb.InspectConfig{
			InfoTypes:       i,
			CustomInfoTypes: customInfoTypes,
			MinLikelihood:   minLikelihood,
			Limits: &dlppb.InspectConfig_FindingLimits{
				MaxFindingsPerRequest: maxFindings,
			},
			IncludeQuote: includeQuote,
		},
		// The item to analyze.
		Item: &dlppb.ContentItem{
			DataItem: &dlppb.ContentItem_ByteItem{
				ByteItem: &dlppb.ByteContentItem{
					Type: bytesType,
					Data: b,
				},
			},
		},
	}
	// Send the request.
	resp, err := client.InspectContent(context.Background(), req)
	if err != nil {
		log.Fatal(err)
	}
	// Print the result.
	fmt.Fprintln(w, resp.GetResult())
}

PHP

use Google\Cloud\Dlp\V2\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\Likelihood;
use Google\Cloud\Dlp\V2\InspectConfig\FindingLimits;
use Google\Cloud\Dlp\V2\ByteContentItem;

/**
 * Inspect a local file.
 *
 * @param string $callingProjectId The project ID to run the API call under
 * @param string $path The file path to the file to inspect
 * @param int $maxFindings (Optional) The maximum number of findings to report per request (0 = server maximum)
 */
function inspect_file(
    $callingProjectId,
    $path,
    $maxFindings = 0
) {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    // The infoTypes of information to match
    $usNameInfoType = (new InfoType())
        ->setName('PERSON_NAME');
    $phoneNumberInfoType = (new InfoType())
        ->setName('PHONE_NUMBER');
    $infoTypes = [$usNameInfoType, $phoneNumberInfoType];

    // The minimum likelihood required before returning a match
    $minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED;

    // Whether to include the matching string in the response
    $includeQuote = true;

    // Specify finding limits
    $limits = (new FindingLimits())
        ->setMaxFindingsPerRequest($maxFindings);

    // Create the configuration object
    $inspectConfig = (new InspectConfig())
        ->setMinLikelihood($minLikelihood)
        ->setLimits($limits)
        ->setInfoTypes($infoTypes)
        ->setIncludeQuote($includeQuote);

    // Create the content item objects
    $typeConstant = (int) array_search(
        mime_content_type($path),
        [false, 'image/jpeg', 'image/bmp', 'image/png', 'image/svg']
    );

    $byteContent = (new ByteContentItem())
        ->setType($typeConstant)
        ->setData(file_get_contents($path));

    $content = (new ContentItem())
        ->setByteItem($byteContent);

    $parent = $dlp->projectName($callingProjectId);

    // Run request
    $response = $dlp->inspectContent($parent, [
        'inspectConfig' => $inspectConfig,
        'item' => $content
    ]);

    $likelihoods = ['Unknown', 'Very unlikely', 'Unlikely', 'Possible',
                    'Likely', 'Very likely'];

    // Print the results
    $findings = $response->getResult()->getFindings();
    if (count($findings) == 0) {
        print('No findings.' . PHP_EOL);
    } else {
        print('Findings:' . PHP_EOL);
        foreach ($findings as $finding) {
            if ($includeQuote) {
                print('  Quote: ' . $finding->getQuote() . PHP_EOL);
            }
            print('  Info type: ' . $finding->getInfoType()->getName() . PHP_EOL);
            print('  Likelihood: ' . $likelihoods[$finding->getLikelihood()] . PHP_EOL);
        }
    }
}

Ruby

# project_id   = "Your Google Cloud project ID"
# filename     = "The file path to the file to inspect"
# max_findings = "Maximum number of findings to report per request (0 = server maximum)"

require "google/cloud/dlp"

dlp = Google::Cloud::Dlp.new
inspect_config = {
  # The types of information to match
  info_types:     [{ name: "PERSON_NAME" }, { name: "PHONE_NUMBER" }],

  # Only return results above a likelihood threshold (0 for all)
  min_likelihood: :POSSIBLE,

  # Limit the number of findings (0 for no limit)
  limits:         { max_findings_per_request: max_findings },

  # Whether to include the matching string in the response
  include_quote:  true
}

# The item to inspect
file = File.open filename, "rb"
item_to_inspect = { byte_item: { type: :BYTES_TYPE_UNSPECIFIED, data: file.read } }

# Run request
parent = "projects/#{project_id}"
response = dlp.inspect_content parent,
                               inspect_config: inspect_config,
                               item:           item_to_inspect

# Print the results
if response.result.findings.empty?
  puts "No findings"
else
  response.result.findings.each do |finding|
    puts "Quote:      #{finding.quote}"
    puts "Info type:  #{finding.info_type.name}"
    puts "Likelihood: #{finding.likelihood}"
  end
end

C#

private static readonly Dictionary<string, ByteContentItem.Types.BytesType> s_fileTypes =
    new Dictionary<string, ByteContentItem.Types.BytesType>()
{
    { ".bmp", ByteContentItem.Types.BytesType.ImageBmp },
    { ".jpg", ByteContentItem.Types.BytesType.ImageJpeg },
    { ".jpeg", ByteContentItem.Types.BytesType.ImageJpeg },
    { ".png", ByteContentItem.Types.BytesType.ImagePng },
    { ".svg", ByteContentItem.Types.BytesType.ImageSvg },
    { ".txt", ByteContentItem.Types.BytesType.TextUtf8 }
};

public static object InspectFile(
    string projectId,
    string file,
    string minLikelihood,
    int maxFindings,
    bool includeQuote,
    IEnumerable<InfoType> infoTypes,
    IEnumerable<CustomInfoType> customInfoTypes)
{
    var fileStream = new FileStream(file, FileMode.Open);
    try
    {
        var inspectConfig = new InspectConfig
        {
            MinLikelihood = (Likelihood)System.Enum.Parse(typeof(Likelihood), minLikelihood),
            Limits = new FindingLimits
            {
                MaxFindingsPerRequest = maxFindings
            },
            IncludeQuote = includeQuote,
            InfoTypes = { infoTypes },
            CustomInfoTypes = { customInfoTypes }
        };
        DlpServiceClient dlp = DlpServiceClient.Create();
        InspectContentResponse response = dlp.InspectContent(new InspectContentRequest
        {
            ParentAsProjectName = new ProjectName(projectId),
            Item = new ContentItem
            {
                ByteItem = new ByteContentItem
                {
                    Data = ByteString.FromStream(fileStream),
                    Type = s_fileTypes.GetValueOrDefault(
                            new FileInfo(file).Extension.ToLower(),
                            ByteContentItem.Types.BytesType.Unspecified
                    )
                }
            },
            InspectConfig = inspectConfig
        });

        var findings = response.Result.Findings;
        if (findings.Count > 0)
        {
            Console.WriteLine("Findings:");
            foreach (var finding in findings)
            {
                if (includeQuote)
                {
                    Console.WriteLine($"  Quote: {finding.Quote}");
                }
                Console.WriteLine($"  InfoType: {finding.InfoType}");
                Console.WriteLine($"  Likelihood: {finding.Likelihood}");
            }
        }
        else
        {
            Console.WriteLine("No findings.");
        }

        return 0;
    }
    finally
    {
        fileStream.Close();
    }
}

このページは役立ちましたか?評価をお願いいたします。

フィードバックを送信...

Cloud Data Loss Prevention