Memeriksa string untuk mendeteksi data sensitif, tidak termasuk substring kustom

Menggambarkan cara menggunakan InspectConfig untuk menginstruksikan Cloud DLP agar nama "Jimmy" tidak cocok dalam pemindaian yang menggunakan detektor ekspresi reguler kustom yang ditentukan.

Jelajahi lebih lanjut

Untuk dokumentasi mendetail yang menyertakan contoh kode ini, lihat artikel berikut:

Contoh kode

C#

Untuk mempelajari cara menginstal dan menggunakan library klien untuk Perlindungan Data Sensitif, lihat Library klien Perlindungan Data Sensitif.

Untuk mengautentikasi Perlindungan Data Sensitif, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, baca Menyiapkan autentikasi untuk lingkungan pengembangan lokal.


using System;
using System.Collections.Generic;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;

public class InspectStringCustomExcludingSubstring
{
    public static InspectContentResponse Inspect(string projectId, string textToInspect, string customDetectorPattern, List<String> excludedSubstringList)
    {
        var dlp = DlpServiceClient.Create();

        var byteContentItem = new ByteContentItem
        {
            Type = ByteContentItem.Types.BytesType.TextUtf8,
            Data = Google.Protobuf.ByteString.CopyFromUtf8(textToInspect)
        };

        var contentItem = new ContentItem { ByteItem = byteContentItem };

        var infoType = new InfoType
        {
            Name = "CUSTOM_NAME_DETECTOR"
        };
        var customInfoType = new CustomInfoType
        {
            InfoType = infoType,
            Regex = new CustomInfoType.Types.Regex { Pattern = customDetectorPattern }
        };

        var exclusionRule = new ExclusionRule
        {
            MatchingType = MatchingType.PartialMatch,
            Dictionary = new CustomInfoType.Types.Dictionary
            {
                WordList = new CustomInfoType.Types.Dictionary.Types.WordList
                {
                    Words = { excludedSubstringList }
                }
            }
        };

        var ruleSet = new InspectionRuleSet
        {
            InfoTypes = { infoType },
            Rules = { new InspectionRule { ExclusionRule = exclusionRule } }
        };

        var config = new InspectConfig
        {
            CustomInfoTypes = { customInfoType },
            IncludeQuote = true,
            RuleSet = { ruleSet }
        };

        var request = new InspectContentRequest
        {
            Parent = new LocationName(projectId, "global").ToString(),
            Item = contentItem,
            InspectConfig = config
        };

        var response = dlp.InspectContent(request);

        Console.WriteLine($"Findings: {response.Result.Findings.Count}");
        foreach (var f in response.Result.Findings)
        {
            Console.WriteLine("\tQuote: " + f.Quote);
            Console.WriteLine("\tInfo type: " + f.InfoType.Name);
            Console.WriteLine("\tLikelihood: " + f.Likelihood);
        }

        return response;
    }
}

Go

Untuk mempelajari cara menginstal dan menggunakan library klien untuk Perlindungan Data Sensitif, lihat Library klien Perlindungan Data Sensitif.

Untuk mengautentikasi Perlindungan Data Sensitif, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, baca Menyiapkan autentikasi untuk lingkungan pengembangan lokal.

import (
	"context"
	"fmt"
	"io"

	dlp "cloud.google.com/go/dlp/apiv2"
	"cloud.google.com/go/dlp/apiv2/dlppb"
)

// inspectStringCustomExcludingSubstring inspects a string for sensitive data, excluding a custom substring
func inspectStringCustomExcludingSubstring(w io.Writer, projectID, textToInspect, customDetectorPattern string, excludedSubstringList []string) error {
	// projectId := "your-project-id"
	// textToInspect := "Name: Doe, John. Name: Example, Jimmy"
	// customDetectorPattern := "[A-Z][a-z]{1,15}, [A-Z][a-z]{1,15}"
	// excludedSubstringList := []string{"Jimmy"}

	ctx := context.Background()

	// Initialize a client once and reuse it to send multiple requests. Clients
	// are safe to use across goroutines. When the client is no longer needed,
	// call the Close method to cleanup its resources.
	client, err := dlp.NewClient(ctx)
	if err != nil {
		return err
	}

	// Closing the client safely cleans up background resources.
	defer client.Close()

	// Specify the type and content to be inspected.
	contentItem := &dlppb.ContentItem{
		DataItem: &dlppb.ContentItem_ByteItem{
			ByteItem: &dlppb.ByteContentItem{
				Type: dlppb.ByteContentItem_TEXT_UTF8,
				Data: []byte(textToInspect),
			},
		},
	}

	// Specify the type of info the inspection will look for.
	infoType := &dlppb.InfoType{
		Name: "CUSTOM_NAME_DETECTOR",
	}

	customInfotype := &dlppb.CustomInfoType{
		InfoType: infoType,
		Type: &dlppb.CustomInfoType_Regex_{
			Regex: &dlppb.CustomInfoType_Regex{
				Pattern: customDetectorPattern,
			},
		},
	}

	// Exclude partial matches from the specified excludedSubstringList.
	exclusionRule := &dlppb.ExclusionRule{
		Type: &dlppb.ExclusionRule_Dictionary{
			Dictionary: &dlppb.CustomInfoType_Dictionary{
				Source: &dlppb.CustomInfoType_Dictionary_WordList_{
					WordList: &dlppb.CustomInfoType_Dictionary_WordList{
						Words: excludedSubstringList,
					},
				},
			},
		},
		MatchingType: dlppb.MatchingType_MATCHING_TYPE_PARTIAL_MATCH,
	}

	// Construct a ruleset that applies the exclusion rule to the EMAIL_ADDRESSES infoType.
	ruleSet := &dlppb.InspectionRuleSet{
		InfoTypes: []*dlppb.InfoType{
			infoType,
		},
		Rules: []*dlppb.InspectionRule{
			{
				Type: &dlppb.InspectionRule_ExclusionRule{
					ExclusionRule: exclusionRule,
				},
			},
		},
	}

	// Construct the Inspect request to be sent by the client.
	req := &dlppb.InspectContentRequest{
		Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
		Item:   contentItem,
		InspectConfig: &dlppb.InspectConfig{
			CustomInfoTypes: []*dlppb.CustomInfoType{
				customInfotype,
			},
			IncludeQuote: true,
			RuleSet: []*dlppb.InspectionRuleSet{
				ruleSet,
			},
		},
	}

	// Send the request.
	resp, err := client.InspectContent(ctx, req)
	if err != nil {
		return err
	}

	// Process the results.
	fmt.Fprintf(w, "Findings: %v\n", len(resp.Result.Findings))
	for _, v := range resp.GetResult().Findings {
		fmt.Fprintf(w, "Quote: %v\n", v.GetQuote())
		fmt.Fprintf(w, "Infotype Name: %v\n", v.GetInfoType().GetName())
		fmt.Fprintf(w, "Likelihood: %v\n", v.GetLikelihood())
	}
	return nil

}

Java

Untuk mempelajari cara menginstal dan menggunakan library klien untuk Perlindungan Data Sensitif, lihat Library klien Perlindungan Data Sensitif.

Untuk mengautentikasi Perlindungan Data Sensitif, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, baca Menyiapkan autentikasi untuk lingkungan pengembangan lokal.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ByteContentItem;
import com.google.privacy.dlp.v2.ByteContentItem.BytesType;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CustomInfoType;
import com.google.privacy.dlp.v2.CustomInfoType.Dictionary;
import com.google.privacy.dlp.v2.CustomInfoType.Dictionary.WordList;
import com.google.privacy.dlp.v2.CustomInfoType.Regex;
import com.google.privacy.dlp.v2.ExclusionRule;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.InspectionRule;
import com.google.privacy.dlp.v2.InspectionRuleSet;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.MatchingType;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

public class InspectStringCustomExcludingSubstring {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    String textToInspect = "Name: Doe, John. Name: Example, Jimmy";
    String customDetectorPattern = "[A-Z][a-z]{1,15}, [A-Z][a-z]{1,15}";
    List<String> excludedSubstringList = Arrays.asList("Jimmy");
    inspectStringCustomExcludingSubstring(
        projectId, textToInspect, customDetectorPattern, excludedSubstringList);
  }

  // Inspects the provided text, avoiding matches specified in the exclusion list.
  public static void inspectStringCustomExcludingSubstring(
      String projectId,
      String textToInspect,
      String customDetectorPattern,
      List<String> excludedSubstringList)
      throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify the type and content to be inspected.
      ByteContentItem byteItem =
          ByteContentItem.newBuilder()
              .setType(BytesType.TEXT_UTF8)
              .setData(ByteString.copyFromUtf8(textToInspect))
              .build();
      ContentItem item = ContentItem.newBuilder().setByteItem(byteItem).build();

      // Specify the type of info the inspection will look for.
      InfoType infoType = InfoType.newBuilder().setName("CUSTOM_NAME_DETECTOR").build();
      CustomInfoType customInfoType =
          CustomInfoType.newBuilder()
              .setInfoType(infoType)
              .setRegex(Regex.newBuilder().setPattern(customDetectorPattern))
              .build();

      // Exclude partial matches from the specified excludedSubstringList.
      ExclusionRule exclusionRule =
          ExclusionRule.newBuilder()
              .setMatchingType(MatchingType.MATCHING_TYPE_PARTIAL_MATCH)
              .setDictionary(
                  Dictionary.newBuilder()
                      .setWordList(WordList.newBuilder().addAllWords(excludedSubstringList)))
              .build();

      // Construct a ruleset that applies the exclusion rule to the EMAIL_ADDRESSES infotype.
      InspectionRuleSet ruleSet =
          InspectionRuleSet.newBuilder()
              .addInfoTypes(infoType)
              .addRules(InspectionRule.newBuilder().setExclusionRule(exclusionRule))
              .build();

      // Construct the configuration for the Inspect request, including the ruleset.
      InspectConfig config =
          InspectConfig.newBuilder()
              .addCustomInfoTypes(customInfoType)
              .setIncludeQuote(true)
              .addRuleSet(ruleSet)
              .build();

      // Construct the Inspect request to be sent by the client.
      InspectContentRequest request =
          InspectContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(item)
              .setInspectConfig(config)
              .build();

      // Use the client to send the API request.
      InspectContentResponse response = dlp.inspectContent(request);

      // Parse the response and process results
      System.out.println("Findings: " + response.getResult().getFindingsCount());
      for (Finding f : response.getResult().getFindingsList()) {
        System.out.println("\tQuote: " + f.getQuote());
        System.out.println("\tInfo type: " + f.getInfoType().getName());
        System.out.println("\tLikelihood: " + f.getLikelihood());
      }
    }
  }
}

Node.js

Untuk mempelajari cara menginstal dan menggunakan library klien untuk Perlindungan Data Sensitif, lihat Library klien Perlindungan Data Sensitif.

Untuk mengautentikasi Perlindungan Data Sensitif, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, baca Menyiapkan autentikasi untuk lingkungan pengembangan lokal.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Instantiates a client
const dlp = new DLP.DlpServiceClient();

// The project ID to run the API call under
// const projectId = 'my-project';

// The string to inspect
// const string = 'Name: Doe, John. Name: Example, Jimmy';

// Words to exclude
// const excludedWords = ['Jimmy'];

async function inspectStringCustomExclusionDict() {
  // Specify the content to be inspected.
  const item = {value: string};

  // Specify the type of info the inspection will look for.
  const customInfoTypes = [
    {
      infoType: {name: 'CUSTOM_NAME_DETECTOR'},
      regex: {pattern: '[A-Z][a-z]{1,15}, [A-Z][a-z]{1,15}'},
    },
  ];

  // Exclude partial matches from the specified excludedSubstringList.
  const exclusionRUle = {
    dictionary: {wordList: {words: excludedWords}},
    matchingType:
      DLP.protos.google.privacy.dlp.v2.MatchingType
        .MATCHING_TYPE_PARTIAL_MATCH,
  };

  // Construct a rule set that will only match if the match text does not
  // contains tokens from the exclusion list.
  const ruleSet = [
    {
      infoTypes: [{name: 'CUSTOM_NAME_DETECTOR'}],
      rules: [
        {
          exclusionRule: exclusionRUle,
        },
      ],
    },
  ];

  // Construct the configuration for the Inspect request, including the ruleset.
  const inspectConfig = {
    customInfoTypes: customInfoTypes,
    ruleSet: ruleSet,
    includeQuote: true,
  };

  // Construct the Inspect request to be sent by the client.
  const request = {
    parent: `projects/${projectId}/locations/global`,
    inspectConfig: inspectConfig,
    item: item,
  };

  // Use the client to send the API request.
  const [response] = await dlp.inspectContent(request);

  // Print findings.
  const findings = response.result.findings;
  if (findings.length > 0) {
    console.log(`Findings: ${findings.length}\n`);
    findings.forEach(finding => {
      console.log(`InfoType: ${finding.infoType.name}`);
      console.log(`\tQuote: ${finding.quote}`);
      console.log(`\tLikelihood: ${finding.likelihood} \n`);
    });
  } else {
    console.log('No findings.');
  }
}
inspectStringCustomExclusionDict();

PHP

Untuk mempelajari cara menginstal dan menggunakan library klien untuk Perlindungan Data Sensitif, lihat Library klien Perlindungan Data Sensitif.

Untuk mengautentikasi Perlindungan Data Sensitif, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, baca Menyiapkan autentikasi untuk lingkungan pengembangan lokal.

use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\CustomInfoType;
use Google\Cloud\Dlp\V2\CustomInfoType\Dictionary;
use Google\Cloud\Dlp\V2\CustomInfoType\Dictionary\WordList;
use Google\Cloud\Dlp\V2\CustomInfoType\Regex;
use Google\Cloud\Dlp\V2\ExclusionRule;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\InspectContentRequest;
use Google\Cloud\Dlp\V2\InspectionRule;
use Google\Cloud\Dlp\V2\InspectionRuleSet;
use Google\Cloud\Dlp\V2\Likelihood;
use Google\Cloud\Dlp\V2\MatchingType;

/**
 * Inspect a string for sensitive data, excluding a custom substring
 * Illustrates how to use an InspectConfig to instruct Cloud DLP to avoid matching on the name "Jimmy" in a scan that uses the specified custom regular expression detector.
 *
 * @param string $projectId         The Google Cloud project id to use as a parent resource.
 * @param string $textToInspect     The string to inspect.
 */
function inspect_string_custom_excluding_substring(
    // TODO(developer): Replace sample parameters before running the code.
    string $projectId,
    string $textToInspect = 'Name: Doe, John. Name: Example, Jimmy'
): void {
    // Instantiate a client.
    $dlp = new DlpServiceClient();
    $customDetectorPattern = '[A-Z][a-z]{1,15}, [A-Z][a-z]{1,15}';

    $parent = "projects/$projectId/locations/global";

    // Specify what content you want the service to Inspect.
    $item = (new ContentItem())
        ->setValue($textToInspect);

    // Specify the type of info the inspection will look for.
    $customerNameDetector = (new InfoType())
        ->setName('CUSTOM_NAME_DETECTOR');
    $customInfoType = (new CustomInfoType())
        ->setInfoType($customerNameDetector)
        ->setRegex((new Regex())
            ->setPattern($customDetectorPattern));

    // Exclude partial matches from the specified excludedSubstringList.
    $excludedSubstringList = (new Dictionary())
        ->setWordList((new WordList())
            ->setWords(['Jimmy']));

    $exclusionRule = (new ExclusionRule())
        ->setMatchingType(MatchingType::MATCHING_TYPE_PARTIAL_MATCH)
        ->setDictionary($excludedSubstringList);

    // Construct a ruleset that applies the exclusion rule.
    $inspectionRuleSet = (new InspectionRuleSet())
        ->setInfoTypes([$customerNameDetector])
        ->setRules([
            (new InspectionRule())
                ->setExclusionRule($exclusionRule),
        ]);

    // Construct the configuration for the Inspect request, including the ruleset.
    $inspectConfig = (new InspectConfig())
        ->setCustomInfoTypes([$customInfoType])
        ->setIncludeQuote(true)
        ->setRuleSet([$inspectionRuleSet]);

    // Run request
    $inspectContentRequest = (new InspectContentRequest())
        ->setParent($parent)
        ->setInspectConfig($inspectConfig)
        ->setItem($item);
    $response = $dlp->inspectContent($inspectContentRequest);

    // Print the results
    $findings = $response->getResult()->getFindings();
    if (count($findings) == 0) {
        printf('No findings.' . PHP_EOL);
    } else {
        printf('Findings:' . PHP_EOL);
        foreach ($findings as $finding) {
            printf('  Quote: %s' . PHP_EOL, $finding->getQuote());
            printf('  Info type: %s' . PHP_EOL, $finding->getInfoType()->getName());
            printf('  Likelihood: %s' . PHP_EOL, Likelihood::name($finding->getLikelihood()));
        }
    }
}

Python

Untuk mempelajari cara menginstal dan menggunakan library klien untuk Perlindungan Data Sensitif, lihat Library klien Perlindungan Data Sensitif.

Untuk mengautentikasi Perlindungan Data Sensitif, siapkan Kredensial Default Aplikasi. Untuk mengetahui informasi selengkapnya, baca Menyiapkan autentikasi untuk lingkungan pengembangan lokal.

from typing import List

import google.cloud.dlp

def inspect_string_custom_excluding_substring(
    project: str, content_string: str, exclusion_list: List[str] = ["jimmy"]
) -> None:
    """Inspects the provided text with a custom detector, avoiding matches on specific tokens

    Uses the Data Loss Prevention API to omit matches on a custom detector
    if they include tokens in the specified exclusion list.

    Args:
        project: The Google Cloud project id to use as a parent resource.
        content_string: The string to inspect.
        exclusion_list: The list of strings to ignore matches on

    Returns:
        None; the response from the API is printed to the terminal.
    """

    # Instantiate a client.
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Construct a custom regex detector for names
    custom_info_types = [
        {
            "info_type": {"name": "CUSTOM_NAME_DETECTOR"},
            "regex": {"pattern": "[A-Z][a-z]{1,15}, [A-Z][a-z]{1,15}"},
        }
    ]

    # Construct a rule set that will only match if the match text does not
    # contains tokens from the exclusion list.
    rule_set = [
        {
            "info_types": [{"name": "CUSTOM_NAME_DETECTOR"}],
            "rules": [
                {
                    "exclusion_rule": {
                        "dictionary": {"word_list": {"words": exclusion_list}},
                        "matching_type": google.cloud.dlp_v2.MatchingType.MATCHING_TYPE_PARTIAL_MATCH,
                    }
                }
            ],
        }
    ]

    # Construct the configuration dictionary
    inspect_config = {
        "custom_info_types": custom_info_types,
        "rule_set": rule_set,
        "include_quote": True,
    }

    # Construct the `item`.
    item = {"value": content_string}

    # Convert the project id into a full resource id.
    parent = f"projects/{project}"

    # Call the API.
    response = dlp.inspect_content(
        request={"parent": parent, "inspect_config": inspect_config, "item": item}
    )

    # Print out the results.
    if response.result.findings:
        for finding in response.result.findings:
            print(f"Quote: {finding.quote}")
            print(f"Info type: {finding.info_type.name}")
            print(f"Likelihood: {finding.likelihood}")
    else:
        print("No findings.")

Langkah selanjutnya

Untuk menelusuri dan memfilter contoh kode untuk produk Google Cloud lainnya, lihat browser contoh Google Cloud.