カスタム infoType 検出器の例

プロトコル

このページのサンプル JSON オブジェクトは、検査と匿名化に使用できる Cloud DLP カスタム infoType 検出器の例です。

さまざまな種類のカスタム infoType 検出器の詳細と作成方法については、カスタム infoType 検出器の作成をご覧ください。

customInfoTypes コンテンツの例

次の表に示す JSON は、"customInfoTypes" 属性内のすべての検査匿名化ジョブ定義に挿入できます。次の完全な JSON リクエストは、カルテ番号の形式を定義するカスタム infoType 検出器定義を含む短い文字列を Cloud DLP に送信します。

JSON 入力:

POST https://dlp.googleapis.com/v2/projects/[PROJECT_ID]/content:inspect?key={YOUR_API_KEY}

{
  "item":{
    "value":"Patients MRN 444-5-22222"
  },
  "inspectConfig":{
    "customInfoTypes":[
      {
        "infoType":{
          "name":"C_MRN"
        },
        "regex":{
          "pattern":"[1-9]{3}-[1-9]{1}-[1-9]{5}"
        },
        "likelihood":"POSSIBLE"
      }
    ]
  }
}
記述例 カスタム infoType 検出器
一般的な英数字 ID

{
  "infoType":{
    "name":"GENERIC_ID2"
  },
  "regex":{
    "pattern":"\\b(([0-9]|([a-zA-Z()]{1,2}[0-9]{1}))[0-9-\\[\\]:(), ._]+)[0-9]{1}"
  },
  "likelihood":"POSSIBLE"
}
一般的な通貨

{
  "infoType":{
    "name":"GENERIC_CURRENCY"
  },
  "regex":{
    "pattern":"((\\p{Sc}{1})( ){0,1}[0-9,()]+)(.){0,1}[0-9]{0,}"
  },
  "likelihood":"VERY_LIKELY"
}
一般的な割合

{
  "infoType":{
    "name":"GENERIC_PERCENT"
  },
  "regex":{
    "pattern":"\\b([0-9,.()]+)( ){0,1}(%){1}"
  },
  "likelihood":"VERY_LIKELY"
}
一般的な測定単位

{
  "infoType":{
    "name":"GENERIC_MEASURE"
  },
  "regex":{
    "pattern":"(?i)([0-9])([0-9,.]+)( ){0,1}(?i)(°C|°F|K|°Ré|°N|°Ra|m³|dm³|cm³|l|dl|cl|ml|fl oz|in³|ft³|yd³|gal|bbl|pt|km|m|dm|cm|mm|mi|in|ft|yd|nautical mile|kg|hg|g|dg|cg|mg|µg|mcg|carat|grain|oz|lb|cwt|ton|km²|m²|dm²|cm²|mm²|ha|ca|mile²|in²|yd²|ft²|acre|nautical mile²|kmph|mps|mph|knot|km/h|m/s|mi/h|Hz|KHz|MHz|GHz|atm|bar|mbar|Pa|hPa|Psi|Torr|J|KJ|cal|kcal|Wh|kWh|BTU|thm|ft-lb|degrees|Celsius|Fahrenheit|Kelvin|Reaumur|Newton|Rankine|cubic |liter|deciliter|centiliter|milliliter|fluid ounce|gallon|petroleum barrel|pint|kilometer|meter|decimeter|centimeter|millimeter|mile|inch|foot|yard|nautical mile|tonne|kilogram|hectogram|gram|decigram|centigram|milligram|microgram|carat|grain|ounce|pound|square|hectare|centiare|square mile|square inch|square yard|square foot|acre|square nautical mile|kilometer|meter|mile per hour|knot|Hertz|Kilohertz|Megahertz|Gigahertz|Atmosphère|Bar|Millibar|Pascal|Hectopascal|Torr|Joule|Kilojoule|Calorie|Kilocalorie|Watt-hour|Kilowatt-hour|Foot-Pound|mpg){1}(s){0,1}\\b"
  },
  "likelihood":"VERY_LIKELY"
}
一般的な RFC ID

{
  "infoType":{
    "name":"GENERIC_RFC"
  },
  "regex":{
    "pattern":"\\b(?i)(rfc)( ){0,1}(20|768|783|791|792|793|826|854|855|862|863|864|868|903|937|951|959|1034|1035|1036|1055|1058|1059|1087|1119|1149|1157|1176|1305|1321|1350|1436|1441|1459|1730|1777|1855|1918|1939|1945|1948|1950|1951|1952|1964|2080|2119|2131|2177|2195|2228|2230|2246|2251|2252|2253|2254|2255|2256|2326|2327|2328|2351|2362|2397|2407|2408|2409|2427|2453|2460|2549|2555|2570|2595|2606|2740|2743|2744|2810|2811|2812|2813|2853|2865|2866|2974|3022|3031|3053|3056|3080|3162|3207|3261|3284|3286|3315|3339|3376|3401|3402|3403|3404|3405|3492|3501|3530|3550|3711|3720|3730|3783|3801|3830|3977|4122|4213|4217|4271|4287|4251|4291|4353|4408|4422|4541|4575|4579|4634|4646|4655|4787|4880|4960|5023|5321|5322|5533|5545|5849|5880|5881|5905|5969|6238|6265|6409|6455|6508|6726|6749|6797|6805|7230|7231|7232|7233|7234|7235|7301|7348|7469|7540|7541|7567|7725|7871|8391){0,1}"
  },
  "likelihood":"LIKELY"
}
一般的な RJ45 ネットワーク コネクタ ID

{
  "infoType":{
    "name":"GENERIC_RJ_NETWORK"
  },
  "regex":{
    "pattern":"\\b(?i)(rj)(-){0,1}(?i)(A1X|A2X|A3X|2MB|11|12|13|14|15C|18|21X|25|26X|27X|31X|32X|33X|34X|35X|38X|41S|45S|45|48C|48S|48X|49C|61X|71C)"
  },
  "likelihood":"VERY_LIKELY"
}
一般的なデータサイズ

{
  "infoType":{
    "name":"GENERIC_DATA_SIZE"
  },
  "regex":{
    "pattern":"\\b([0-9,. ]+)(?i)(byte|Kilobyte|KiB|Kilobit|kbit|bit|Megabyte|MiB|Megabit|Mbit|meg|Gigabyte|GiB|Gigabit|Gbit|gig|Terabyte|TiB|Terabit|Tbit|Petabyte|PiB|Petabit|Pbit|Exabyte|EiB|Exabit|Ebit|Zettabyte|ZiB|Zettabit|Zbit|Yottabyte|YiB|Yottabit|Ybit|KB|MB|GB|TB|PB|EB|ZB|YB)(p){0,1}(s){0,1}"
  },
  "likelihood":"VERY_LIKELY"
}
数値 ID

{
  "infoType":{
    "name":"GENERIC_ID1"
  },
  "regex":{
    "pattern":"\\w*[0-9][0-9-()\\[\\].:/]+[0-9]\\w*"
  },
  "likelihood":"POSSIBLE"
}

Java


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ByteContentItem;
import com.google.privacy.dlp.v2.ByteContentItem.BytesType;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CustomInfoType;
import com.google.privacy.dlp.v2.CustomInfoType.Regex;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.Likelihood;
import com.google.privacy.dlp.v2.LocationName;
import com.google.protobuf.ByteString;
import java.io.IOException;

public class InspectWithCustomRegex {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    String textToInspect = "Patients MRN 444-5-22222";
    String customRegexPattern = "[1-9]{3}-[1-9]{1}-[1-9]{5}";
    inspectWithCustomRegex(projectId, textToInspect, customRegexPattern);
  }

  // Inspects a BigQuery Table
  public static void inspectWithCustomRegex(
      String projectId, String textToInspect, String customRegexPattern) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify the type and content to be inspected.
      ByteContentItem byteItem =
          ByteContentItem.newBuilder()
              .setType(BytesType.TEXT_UTF8)
              .setData(ByteString.copyFromUtf8(textToInspect))
              .build();
      ContentItem item = ContentItem.newBuilder().setByteItem(byteItem).build();

      // Specify the regex pattern the inspection will look for.
      Regex regex = Regex.newBuilder().setPattern(customRegexPattern).build();

      // Construct the custom regex detector.
      InfoType infoType = InfoType.newBuilder().setName("C_MRN").build();
      CustomInfoType customInfoType =
          CustomInfoType.newBuilder()
              .setInfoType(infoType)
              .setRegex(regex)
              .build();

      // Construct the configuration for the Inspect request.
      InspectConfig config =
          InspectConfig.newBuilder()
              .addCustomInfoTypes(customInfoType)
              .setIncludeQuote(true)
              .setMinLikelihood(Likelihood.POSSIBLE)
              .build();

      // Construct the Inspect request to be sent by the client.
      InspectContentRequest request =
          InspectContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString()).setItem(item)
              .setInspectConfig(config)
              .build();

      // Use the client to send the API request.
      InspectContentResponse response = dlp.inspectContent(request);

      // Parse the response and process results
      System.out.println("Findings: " + response.getResult().getFindingsCount());
      for (Finding f : response.getResult().getFindingsList()) {
        System.out.println("\tQuote: " + f.getQuote());
        System.out.println("\tInfo type: " + f.getInfoType().getName());
        System.out.println("\tLikelihood: " + f.getLikelihood());
      }
    }
  }
}