自定义 infoType 检测器示例

协议

本页中的示例 JSON 对象是 Cloud DLP 自定义 infoType 检测器的几个示例,您可以在检查和去标识化过程中使用。

如需详细了解不同类型的自定义 infoType 检测器以及创建方法,请参阅创建自定义 infoType 检测器

示例 customInfoTypes 内容

下表中提供的 JSON 可以插入到 "customInfoTypes" 特性中的任何检查去标识化作业定义。以下完整的 JSON 请求向 Cloud DLP 发送一个短字符串,其中包含定义医疗记录编号格式的自定义 infoType 检测器定义。

JSON 输入:

POST https://dlp.googleapis.com/v2/projects/[PROJECT_ID]/content:inspect?key={YOUR_API_KEY}

{
  "item":{
    "value":"Patients MRN 444-5-22222"
  },
  "inspectConfig":{
    "customInfoTypes":[
      {
        "infoType":{
          "name":"C_MRN"
        },
        "regex":{
          "pattern":"[1-9]{3}-[1-9]{1}-[1-9]{5}"
        },
        "likelihood":"POSSIBLE"
      }
    ]
  }
}
示例说明 自定义 infoType 检测器
通用字母数字标识符

{
  "infoType":{
    "name":"GENERIC_ID2"
  },
  "regex":{
    "pattern":"\\b(([0-9]|([a-zA-Z()]{1,2}[0-9]{1}))[0-9-\\[\\]:(), ._]+)[0-9]{1}"
  },
  "likelihood":"POSSIBLE"
}
通用货币

{
  "infoType":{
    "name":"GENERIC_CURRENCY"
  },
  "regex":{
    "pattern":"((\\p{Sc}{1})( ){0,1}[0-9,()]+)(.){0,1}[0-9]{0,}"
  },
  "likelihood":"VERY_LIKELY"
}
通用百分比

{
  "infoType":{
    "name":"GENERIC_PERCENT"
  },
  "regex":{
    "pattern":"\\b([0-9,.()]+)( ){0,1}(%){1}"
  },
  "likelihood":"VERY_LIKELY"
}
通用计量单位

{
  "infoType":{
    "name":"GENERIC_MEASURE"
  },
  "regex":{
    "pattern":"(?i)([0-9])([0-9,.]+)( ){0,1}(?i)(°C|°F|K|°Ré|°N|°Ra|m³|dm³|cm³|l|dl|cl|ml|fl oz|in³|ft³|yd³|gal|bbl|pt|km|m|dm|cm|mm|mi|in|ft|yd|nautical mile|kg|hg|g|dg|cg|mg|µg|mcg|carat|grain|oz|lb|cwt|ton|km²|m²|dm²|cm²|mm²|ha|ca|mile²|in²|yd²|ft²|acre|nautical mile²|kmph|mps|mph|knot|km/h|m/s|mi/h|Hz|KHz|MHz|GHz|atm|bar|mbar|Pa|hPa|Psi|Torr|J|KJ|cal|kcal|Wh|kWh|BTU|thm|ft-lb|degrees|Celsius|Fahrenheit|Kelvin|Reaumur|Newton|Rankine|cubic |liter|deciliter|centiliter|milliliter|fluid ounce|gallon|petroleum barrel|pint|kilometer|meter|decimeter|centimeter|millimeter|mile|inch|foot|yard|nautical mile|tonne|kilogram|hectogram|gram|decigram|centigram|milligram|microgram|carat|grain|ounce|pound|square|hectare|centiare|square mile|square inch|square yard|square foot|acre|square nautical mile|kilometer|meter|mile per hour|knot|Hertz|Kilohertz|Megahertz|Gigahertz|Atmosphère|Bar|Millibar|Pascal|Hectopascal|Torr|Joule|Kilojoule|Calorie|Kilocalorie|Watt-hour|Kilowatt-hour|Foot-Pound|mpg){1}(s){0,1}\\b"
  },
  "likelihood":"VERY_LIKELY"
}
通用请求意见稿 (RFC) 标识符

{
  "infoType":{
    "name":"GENERIC_RFC"
  },
  "regex":{
    "pattern":"\\b(?i)(rfc)( ){0,1}(20|768|783|791|792|793|826|854|855|862|863|864|868|903|937|951|959|1034|1035|1036|1055|1058|1059|1087|1119|1149|1157|1176|1305|1321|1350|1436|1441|1459|1730|1777|1855|1918|1939|1945|1948|1950|1951|1952|1964|2080|2119|2131|2177|2195|2228|2230|2246|2251|2252|2253|2254|2255|2256|2326|2327|2328|2351|2362|2397|2407|2408|2409|2427|2453|2460|2549|2555|2570|2595|2606|2740|2743|2744|2810|2811|2812|2813|2853|2865|2866|2974|3022|3031|3053|3056|3080|3162|3207|3261|3284|3286|3315|3339|3376|3401|3402|3403|3404|3405|3492|3501|3530|3550|3711|3720|3730|3783|3801|3830|3977|4122|4213|4217|4271|4287|4251|4291|4353|4408|4422|4541|4575|4579|4634|4646|4655|4787|4880|4960|5023|5321|5322|5533|5545|5849|5880|5881|5905|5969|6238|6265|6409|6455|6508|6726|6749|6797|6805|7230|7231|7232|7233|7234|7235|7301|7348|7469|7540|7541|7567|7725|7871|8391){0,1}"
  },
  "likelihood":"LIKELY"
}
通用 RJ45 网络连接器标识符

{
  "infoType":{
    "name":"GENERIC_RJ_NETWORK"
  },
  "regex":{
    "pattern":"\\b(?i)(rj)(-){0,1}(?i)(A1X|A2X|A3X|2MB|11|12|13|14|15C|18|21X|25|26X|27X|31X|32X|33X|34X|35X|38X|41S|45S|45|48C|48S|48X|49C|61X|71C)"
  },
  "likelihood":"VERY_LIKELY"
}
通用数据大小

{
  "infoType":{
    "name":"GENERIC_DATA_SIZE"
  },
  "regex":{
    "pattern":"\\b([0-9,. ]+)(?i)(byte|Kilobyte|KiB|Kilobit|kbit|bit|Megabyte|MiB|Megabit|Mbit|meg|Gigabyte|GiB|Gigabit|Gbit|gig|Terabyte|TiB|Terabit|Tbit|Petabyte|PiB|Petabit|Pbit|Exabyte|EiB|Exabit|Ebit|Zettabyte|ZiB|Zettabit|Zbit|Yottabyte|YiB|Yottabit|Ybit|KB|MB|GB|TB|PB|EB|ZB|YB)(p){0,1}(s){0,1}"
  },
  "likelihood":"VERY_LIKELY"
}
数字标识符

{
  "infoType":{
    "name":"GENERIC_ID1"
  },
  "regex":{
    "pattern":"\\w*[0-9][0-9-()\\[\\].:/]+[0-9]\\w*"
  },
  "likelihood":"POSSIBLE"
}

Java

如需了解如何安装和使用 Cloud DLP 客户端库,请参阅 Cloud DLP 客户端库


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.ByteContentItem;
import com.google.privacy.dlp.v2.ByteContentItem.BytesType;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.CustomInfoType;
import com.google.privacy.dlp.v2.CustomInfoType.Regex;
import com.google.privacy.dlp.v2.Finding;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectContentRequest;
import com.google.privacy.dlp.v2.InspectContentResponse;
import com.google.privacy.dlp.v2.Likelihood;
import com.google.privacy.dlp.v2.LocationName;
import com.google.protobuf.ByteString;
import java.io.IOException;

public class InspectWithCustomRegex {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    String textToInspect = "Patients MRN 444-5-22222";
    String customRegexPattern = "[1-9]{3}-[1-9]{1}-[1-9]{5}";
    inspectWithCustomRegex(projectId, textToInspect, customRegexPattern);
  }

  // Inspects a BigQuery Table
  public static void inspectWithCustomRegex(
      String projectId, String textToInspect, String customRegexPattern) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlp = DlpServiceClient.create()) {
      // Specify the type and content to be inspected.
      ByteContentItem byteItem =
          ByteContentItem.newBuilder()
              .setType(BytesType.TEXT_UTF8)
              .setData(ByteString.copyFromUtf8(textToInspect))
              .build();
      ContentItem item = ContentItem.newBuilder().setByteItem(byteItem).build();

      // Specify the regex pattern the inspection will look for.
      Regex regex = Regex.newBuilder().setPattern(customRegexPattern).build();

      // Construct the custom regex detector.
      InfoType infoType = InfoType.newBuilder().setName("C_MRN").build();
      CustomInfoType customInfoType =
          CustomInfoType.newBuilder().setInfoType(infoType).setRegex(regex).build();

      // Construct the configuration for the Inspect request.
      InspectConfig config =
          InspectConfig.newBuilder()
              .addCustomInfoTypes(customInfoType)
              .setIncludeQuote(true)
              .setMinLikelihood(Likelihood.POSSIBLE)
              .build();

      // Construct the Inspect request to be sent by the client.
      InspectContentRequest request =
          InspectContentRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setItem(item)
              .setInspectConfig(config)
              .build();

      // Use the client to send the API request.
      InspectContentResponse response = dlp.inspectContent(request);

      // Parse the response and process results
      System.out.println("Findings: " + response.getResult().getFindingsCount());
      for (Finding f : response.getResult().getFindingsList()) {
        System.out.println("\tQuote: " + f.getQuote());
        System.out.println("\tInfo type: " + f.getInfoType().getName());
        System.out.println("\tLikelihood: " + f.getLikelihood());
      }
    }
  }
}