Este exemplo demonstra como inspecionar um recurso do Cloud Storage e criar cópias desidentificadas dos arquivos.
Mais informações
Para ver a documentação detalhada que inclui este exemplo de código, consulte:
Exemplo de código
C#
Para saber como instalar e usar a biblioteca de cliente para proteção de dados sensíveis, consulte Bibliotecas de cliente de proteção de dados sensíveis.
Para usar a proteção de dados sensíveis, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
using System.Linq;
public class DeidentifyDataStoredInCloudStorage
{
public static DlpJob Deidentify(
string projectId,
string gcsInputPath,
string unstructuredDeidentifyTemplatePath,
string structuredDeidentifyTemplatePath,
string imageRedactionTemplatePath,
string gcsOutputPath,
string datasetId,
string tableId)
{
// Instantiate the client.
var dlp = DlpServiceClient.Create();
//Construct the storage config by specifying the input directory.
var storageConfig = new StorageConfig
{
CloudStorageOptions = new CloudStorageOptions
{
FileSet = new CloudStorageOptions.Types.FileSet
{
Url = gcsInputPath
}
}
};
// Construct the inspect config by specifying the type of info to be inspected.
var inspectConfig = new InspectConfig
{
InfoTypes =
{
new InfoType[]
{
new InfoType { Name = "PERSON_NAME" },
new InfoType { Name = "EMAIL_ADDRESS" }
}
},
IncludeQuote = true
};
// Construct the actions to take after the inspection portion of the job is completed.
// Specify how Cloud DLP must de-identify sensitive data in structured files, unstructured files and images
// using Transformation config.
// The de-identified files will be written to the the GCS bucket path specified in gcsOutputPath and the details of
// transformations performed will be written to BigQuery table specified in datasetId and tableId.
var actions = new Action[]
{
new Action
{
Deidentify = new Action.Types.Deidentify
{
CloudStorageOutput = gcsOutputPath,
TransformationConfig = new TransformationConfig
{
DeidentifyTemplate = unstructuredDeidentifyTemplatePath,
ImageRedactTemplate = imageRedactionTemplatePath,
StructuredDeidentifyTemplate = structuredDeidentifyTemplatePath,
},
TransformationDetailsStorageConfig = new TransformationDetailsStorageConfig
{
Table = new BigQueryTable
{
ProjectId = projectId,
DatasetId = datasetId,
TableId = tableId
}
}
}
}
};
// Construct the inspect job config using created storage config, inspect config and actions.
var inspectJob = new InspectJobConfig
{
StorageConfig = storageConfig,
InspectConfig = inspectConfig,
Actions = { actions }
};
// Create the dlp job and call the API.
DlpJob response = dlp.CreateDlpJob(new CreateDlpJobRequest
{
ParentAsLocationName = new LocationName(projectId, "global"),
InspectJob = inspectJob
});
return response;
}
}
Go
Para saber como instalar e usar a biblioteca de cliente para proteção de dados sensíveis, consulte Bibliotecas de cliente de proteção de dados sensíveis.
Para usar a proteção de dados sensíveis, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.
import (
"context"
"fmt"
"io"
dlp "cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
func deidentifyCloudStorage(w io.Writer, projectID, gcsUri, tableId, datasetId, outputDirectory, deidentifyTemplateId, structuredDeidentifyTemplateId, imageRedactTemplateId string) error {
// projectId := "my-project-id"
// gcsUri := "gs://" + "your-bucket-name" + "/path/to/your/file.txt"
// tableId := "your-bigquery-table-id"
// datasetId := "your-bigquery-dataset-id"
// outputDirectory := "your-output-directory"
// deidentifyTemplateId := "your-deidentify-template-id"
// structuredDeidentifyTemplateId := "your-structured-deidentify-template-id"
// imageRedactTemplateId := "your-image-redact-template-id"
ctx := context.Background()
// Initialize a client once and reuse it to send multiple requests. Clients
// are safe to use across goroutines. When the client is no longer needed,
// call the Close method to cleanup its resources.
client, err := dlp.NewClient(ctx)
if err != nil {
return err
}
// Closing the client safely cleans up background resources.
defer client.Close()
// Set path in Cloud Storage.
cloudStorageOptions := &dlppb.CloudStorageOptions{
FileSet: &dlppb.CloudStorageOptions_FileSet{
Url: gcsUri,
},
}
// Define the storage config options for cloud storage options.
storageConfig := &dlppb.StorageConfig{
Type: &dlppb.StorageConfig_CloudStorageOptions{
CloudStorageOptions: cloudStorageOptions,
},
}
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
infoTypes := []*dlppb.InfoType{
{Name: "PERSON_NAME"},
{Name: "EMAIL_ADDRESS"},
}
// inspectConfig holds the configuration settings for data inspection and analysis
// within the context of the Google Cloud Data Loss Prevention (DLP) API.
inspectConfig := &dlppb.InspectConfig{
InfoTypes: infoTypes,
IncludeQuote: true,
}
// Types of files to include for de-identification.
fileTypesToTransform := []dlppb.FileType{
dlppb.FileType_CSV,
dlppb.FileType_IMAGE,
dlppb.FileType_TEXT_FILE,
}
// Specify the BigQuery table to be inspected.
table := &dlppb.BigQueryTable{
ProjectId: projectID,
DatasetId: datasetId,
TableId: tableId,
}
// transformationDetailsStorageConfig holds configuration settings for storing transformation
// details in the context of the Google Cloud Data Loss Prevention (DLP) API.
transformationDetailsStorageConfig := &dlppb.TransformationDetailsStorageConfig{
Type: &dlppb.TransformationDetailsStorageConfig_Table{
Table: table,
},
}
transformationConfig := &dlppb.TransformationConfig{
DeidentifyTemplate: deidentifyTemplateId,
ImageRedactTemplate: imageRedactTemplateId,
StructuredDeidentifyTemplate: structuredDeidentifyTemplateId,
}
// Action to execute on the completion of a job.
deidentify := &dlppb.Action_Deidentify{
TransformationConfig: transformationConfig,
TransformationDetailsStorageConfig: transformationDetailsStorageConfig,
Output: &dlppb.Action_Deidentify_CloudStorageOutput{
CloudStorageOutput: outputDirectory,
},
FileTypesToTransform: fileTypesToTransform,
}
action := &dlppb.Action{
Action: &dlppb.Action_Deidentify_{
Deidentify: deidentify,
},
}
// Configure the inspection job we want the service to perform.
inspectJobConfig := &dlppb.InspectJobConfig{
StorageConfig: storageConfig,
InspectConfig: inspectConfig,
Actions: []*dlppb.Action{
action,
},
}
// Construct the job creation request to be sent by the client.
req := &dlppb.CreateDlpJobRequest{
Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
Job: &dlppb.CreateDlpJobRequest_InspectJob{
InspectJob: inspectJobConfig,
},
}
// Send the request.
resp, err := client.CreateDlpJob(ctx, req)
if err != nil {
fmt.Fprintf(w, "error after resp: %v", err)
return err
}
// Print the results.
fmt.Fprint(w, "Job created successfully: ", resp.Name)
return nil
}
Java
Para saber como instalar e usar a biblioteca de cliente para proteção de dados sensíveis, consulte Bibliotecas de cliente de proteção de dados sensíveis.
Para usar a proteção de dados sensíveis, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.Action;
import com.google.privacy.dlp.v2.BigQueryTable;
import com.google.privacy.dlp.v2.CloudStorageOptions;
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
import com.google.privacy.dlp.v2.DlpJob;
import com.google.privacy.dlp.v2.FileType;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InfoTypeStats;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectDataSourceDetails;
import com.google.privacy.dlp.v2.InspectJobConfig;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.ProjectDeidentifyTemplateName;
import com.google.privacy.dlp.v2.StorageConfig;
import com.google.privacy.dlp.v2.TransformationConfig;
import com.google.privacy.dlp.v2.TransformationDetailsStorageConfig;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
public class DeidentifyCloudStorage {
// Set the timeout duration in minutes.
private static final int TIMEOUT_MINUTES = 15;
public static void main(String[] args) throws IOException, InterruptedException {
// TODO(developer): Replace these variables before running the sample.
// The Google Cloud project id to use as a parent resource.
String projectId = "your-project-id";
// Specify the cloud storage directory that you want to inspect.
String gcsPath = "gs://" + "your-bucket-name" + "/path/to/your/file.txt";
// Specify the big query dataset id to store the transformation details.
String datasetId = "your-bigquery-dataset-id";
// Specify the big query table id to store the transformation details.
String tableId = "your-bigquery-table-id";
// Specify the cloud storage directory to store the de-identified files.
String outputDirectory = "your-output-directory";
// Specify the de-identify template ID for unstructured files.
String deidentifyTemplateId = "your-deidentify-template-id";
// Specify the de-identify template ID for structured files.
String structuredDeidentifyTemplateId = "your-structured-deidentify-template-id";
// Specify the de-identify template ID for images.
String imageRedactTemplateId = "your-image-redact-template-id";
deidentifyCloudStorage(
projectId,
gcsPath,
tableId,
datasetId,
outputDirectory,
deidentifyTemplateId,
structuredDeidentifyTemplateId,
imageRedactTemplateId);
}
public static void deidentifyCloudStorage(
String projectId,
String gcsPath,
String tableId,
String datasetId,
String outputDirectory,
String deidentifyTemplateId,
String structuredDeidentifyTemplateId,
String imageRedactTemplateId)
throws IOException, InterruptedException {
try (DlpServiceClient dlp = DlpServiceClient.create()) {
// Set path in Cloud Storage.
CloudStorageOptions cloudStorageOptions =
CloudStorageOptions.newBuilder()
.setFileSet(CloudStorageOptions.FileSet.newBuilder().setUrl(gcsPath))
.build();
// Set storage config indicating the type of cloud storage.
StorageConfig storageConfig =
StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build();
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
List<InfoType> infoTypes = new ArrayList<>();
for (String typeName : new String[] {"PERSON_NAME", "EMAIL_ADDRESS"}) {
infoTypes.add(InfoType.newBuilder().setName(typeName).build());
}
InspectConfig inspectConfig =
InspectConfig.newBuilder().addAllInfoTypes(infoTypes).setIncludeQuote(true).build();
// Types of files to include for de-identification.
List<FileType> fileTypesToTransform =
Arrays.asList(
FileType.valueOf("IMAGE"), FileType.valueOf("CSV"), FileType.valueOf("TEXT_FILE"));
// Specify the big query table to store the transformation details.
BigQueryTable table =
BigQueryTable.newBuilder()
.setProjectId(projectId)
.setTableId(tableId)
.setDatasetId(datasetId)
.build();
TransformationDetailsStorageConfig transformationDetailsStorageConfig =
TransformationDetailsStorageConfig.newBuilder().setTable(table).build();
// Specify the de-identify template used for the transformation.
TransformationConfig transformationConfig =
TransformationConfig.newBuilder()
.setDeidentifyTemplate(
ProjectDeidentifyTemplateName.of(projectId, deidentifyTemplateId).toString())
.setImageRedactTemplate(
ProjectDeidentifyTemplateName.of(projectId, imageRedactTemplateId).toString())
.setStructuredDeidentifyTemplate(
ProjectDeidentifyTemplateName.of(projectId, structuredDeidentifyTemplateId)
.toString())
.build();
Action.Deidentify deidentify =
Action.Deidentify.newBuilder()
.setCloudStorageOutput(outputDirectory)
.setTransformationConfig(transformationConfig)
.setTransformationDetailsStorageConfig(transformationDetailsStorageConfig)
.addAllFileTypesToTransform(fileTypesToTransform)
.build();
Action action = Action.newBuilder().setDeidentify(deidentify).build();
// Configure the long-running job we want the service to perform.
InspectJobConfig inspectJobConfig =
InspectJobConfig.newBuilder()
.setInspectConfig(inspectConfig)
.setStorageConfig(storageConfig)
.addActions(action)
.build();
// Construct the job creation request to be sent by the client.
CreateDlpJobRequest createDlpJobRequest =
CreateDlpJobRequest.newBuilder()
.setParent(LocationName.of(projectId, "global").toString())
.setInspectJob(inspectJobConfig)
.build();
// Send the job creation request.
DlpJob response = dlp.createDlpJob(createDlpJobRequest);
// Get the current time.
long startTime = System.currentTimeMillis();
// Check if the job state is DONE.
while (response.getState() != DlpJob.JobState.DONE) {
// Sleep for 30 second.
Thread.sleep(30000);
// Get the updated job status.
response = dlp.getDlpJob(response.getName());
// Check if the timeout duration has exceeded.
long elapsedTime = System.currentTimeMillis() - startTime;
if (TimeUnit.MILLISECONDS.toMinutes(elapsedTime) >= TIMEOUT_MINUTES) {
System.out.printf("Job did not complete within %d minutes.%n", TIMEOUT_MINUTES);
break;
}
}
// Print the results.
System.out.println("Job status: " + response.getState());
System.out.println("Job name: " + response.getName());
InspectDataSourceDetails.Result result = response.getInspectDetails().getResult();
System.out.println("Findings: ");
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
System.out.println("\tCount: " + infoTypeStat.getCount());
}
}
}
}
Node.js
Para saber como instalar e usar a biblioteca de cliente para proteção de dados sensíveis, consulte Bibliotecas de cliente de proteção de dados sensíveis.
Para usar a proteção de dados sensíveis, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.
// Imports the Google Cloud client library
const DLP = require('@google-cloud/dlp');
// Instantiates a client
const dlp = new DLP.DlpServiceClient();
// The project ID to run the API call under
// const projectId = 'my-project';
// The Cloud Storage directory that needs to be inspected
// const inputDirectory = 'your-google-cloud-storage-path';
// The ID of the dataset to inspect, e.g. 'my_dataset'
// const datasetId = 'my_dataset';
// The ID of the table to inspect, e.g. 'my_table'
// const tableId = 'my_table';
// The Cloud Storage directory that will be used to store the de-identified files
// const outputDirectory = 'your-output-directory';
// The full resource name of the default de-identify template
// const deidentifyTemplateId = 'your-deidentify-template-id';
// The full resource name of the de-identify template for structured files
// const structuredDeidentifyTemplateId = 'your-structured-deidentify-template-id';
// The full resource name of the image redaction template for images
// const imageRedactTemplateId = 'your-image-redact-template-id';
async function deidentifyCloudStorage() {
// Specify storage configuration that uses file set.
const storageConfig = {
cloudStorageOptions: {
fileSet: {
url: inputDirectory,
},
},
};
// Specify the type of info the inspection will look for.
const infoTypes = [{name: 'PERSON_NAME'}, {name: 'EMAIL_ADDRESS'}];
// Construct inspect configuration
const inspectConfig = {
infoTypes: infoTypes,
includeQuote: true,
};
// Types of files to include for de-identification.
const fileTypesToTransform = [
{fileType: 'IMAGE'},
{fileType: 'CSV'},
{fileType: 'TEXT_FILE'},
];
// Specify the big query table to store the transformation details.
const transformationDetailsStorageConfig = {
table: {
projectId: projectId,
tableId: tableId,
datasetId: datasetId,
},
};
// Specify the de-identify template used for the transformation.
const transformationConfig = {
deidentifyTemplate: deidentifyTemplateId,
structuredDeidentifyTemplate: structuredDeidentifyTemplateId,
imageRedactTemplate: imageRedactTemplateId,
};
// Construct action to de-identify sensitive data.
const action = {
deidentify: {
cloudStorageOutput: outputDirectory,
transformationConfig: transformationConfig,
transformationDetailsStorageConfig: transformationDetailsStorageConfig,
fileTypes: fileTypesToTransform,
},
};
// Construct the inspect job configuration.
const inspectJobConfig = {
inspectConfig: inspectConfig,
storageConfig: storageConfig,
actions: [action],
};
// Construct the job creation request to be sent by the client.
const createDlpJobRequest = {
parent: `projects/${projectId}/locations/global`,
inspectJob: inspectJobConfig,
};
// Send the job creation request and process the response.
const [response] = await dlp.createDlpJob(createDlpJobRequest);
const jobName = response.name;
// Waiting for a maximum of 15 minutes for the job to get complete.
let job;
let numOfAttempts = 30;
while (numOfAttempts > 0) {
// Fetch DLP Job status
[job] = await dlp.getDlpJob({name: jobName});
// Check if the job has completed.
if (job.state === 'DONE') {
break;
}
if (job.state === 'FAILED') {
console.log('Job Failed, Please check the configuration.');
return;
}
// Sleep for a short duration before checking the job status again.
await new Promise(resolve => {
setTimeout(() => resolve(), 30000);
});
numOfAttempts -= 1;
}
// Print out the results.
const infoTypeStats = job.inspectDetails.result.infoTypeStats;
if (infoTypeStats.length > 0) {
infoTypeStats.forEach(infoTypeStat => {
console.log(
` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.`
);
});
} else {
console.log('No findings.');
}
}
await deidentifyCloudStorage();
PHP
Para saber como instalar e usar a biblioteca de cliente para proteção de dados sensíveis, consulte Bibliotecas de cliente de proteção de dados sensíveis.
Para usar a proteção de dados sensíveis, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.
use Google\Cloud\Dlp\V2\Action;
use Google\Cloud\Dlp\V2\Action\Deidentify;
use Google\Cloud\Dlp\V2\BigQueryTable;
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\CloudStorageOptions;
use Google\Cloud\Dlp\V2\CloudStorageOptions\FileSet;
use Google\Cloud\Dlp\V2\CreateDlpJobRequest;
use Google\Cloud\Dlp\V2\DlpJob\JobState;
use Google\Cloud\Dlp\V2\FileType;
use Google\Cloud\Dlp\V2\GetDlpJobRequest;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\InspectJobConfig;
use Google\Cloud\Dlp\V2\StorageConfig;
use Google\Cloud\Dlp\V2\TransformationConfig;
use Google\Cloud\Dlp\V2\TransformationDetailsStorageConfig;
/**
* De-identify sensitive data stored in Cloud Storage using the API.
* Create an inspection job that has a de-identification action.
*
* @param string $callingProjectId The project ID to run the API call under.
* @param string $inputgcsPath The Cloud Storage directory that you want to de-identify.
* @param string $outgcsPath The Cloud Storage directory where you want to store the
* de-identified files.
* @param string $deidentifyTemplateName The full resource name of the default de-identify template — for
* unstructured and structured files — if you created one. This value
* must be in the format
* `projects/projectName/(locations/locationId)/deidentifyTemplates/templateName`.
* @param string $structuredDeidentifyTemplateName The full resource name of the de-identify template for structured
* files if you created one. This value must be in the format
* `projects/projectName/(locations/locationId)/deidentifyTemplates/templateName`.
* @param string $imageRedactTemplateName The full resource name of the image redaction template for images if
* you created one. This value must be in the format
* `projects/projectName/(locations/locationId)/deidentifyTemplates/templateName`.
* @param string $datasetId The ID of the BigQuery dataset where you want to store
* the transformation details. If you don't provide a table ID, the
* system automatically creates one.
* @param string $tableId The ID of the BigQuery table where you want to store the
* transformation details.
*/
function deidentify_cloud_storage(
// TODO(developer): Replace sample parameters before running the code.
string $callingProjectId,
string $inputgcsPath = 'gs://YOUR_GOOGLE_STORAGE_BUCKET',
string $outgcsPath = 'gs://YOUR_GOOGLE_STORAGE_BUCKET',
string $deidentifyTemplateName = 'YOUR_DEIDENTIFY_TEMPLATE_NAME',
string $structuredDeidentifyTemplateName = 'YOUR_STRUCTURED_DEIDENTIFY_TEMPLATE_NAME',
string $imageRedactTemplateName = 'YOUR_IMAGE_REDACT_DEIDENTIFY_TEMPLATE_NAME',
string $datasetId = 'YOUR_DATASET_ID',
string $tableId = 'YOUR_TABLE_ID'
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
$parent = "projects/$callingProjectId/locations/global";
// Specify the GCS Path to be de-identify.
$cloudStorageOptions = (new CloudStorageOptions())
->setFileSet((new FileSet())
->setUrl($inputgcsPath));
$storageConfig = (new StorageConfig())
->setCloudStorageOptions(($cloudStorageOptions));
// Specify the type of info the inspection will look for.
$inspectConfig = (new InspectConfig())
->setInfoTypes([
(new InfoType())->setName('PERSON_NAME'),
(new InfoType())->setName('EMAIL_ADDRESS')
]);
// Specify the big query table to store the transformation details.
$transformationDetailsStorageConfig = (new TransformationDetailsStorageConfig())
->setTable((new BigQueryTable())
->setProjectId($callingProjectId)
->setDatasetId($datasetId)
->setTableId($tableId));
// Specify the de-identify template used for the transformation.
$transformationConfig = (new TransformationConfig())
->setDeidentifyTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $deidentifyTemplateName)
)
->setStructuredDeidentifyTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $structuredDeidentifyTemplateName)
)
->setImageRedactTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $imageRedactTemplateName)
);
$deidentify = (new Deidentify())
->setCloudStorageOutput($outgcsPath)
->setTransformationConfig($transformationConfig)
->setTransformationDetailsStorageConfig($transformationDetailsStorageConfig)
->setFileTypesToTransform([FileType::TEXT_FILE, FileType::IMAGE, FileType::CSV]);
$action = (new Action())
->setDeidentify($deidentify);
// Configure the inspection job we want the service to perform.
$inspectJobConfig = (new InspectJobConfig())
->setInspectConfig($inspectConfig)
->setStorageConfig($storageConfig)
->setActions([$action]);
// Send the job creation request and process the response.
$createDlpJobRequest = (new CreateDlpJobRequest())
->setParent($parent)
->setInspectJob($inspectJobConfig);
$job = $dlp->createDlpJob($createDlpJobRequest);
$numOfAttempts = 10;
do {
printf('Waiting for job to complete' . PHP_EOL);
sleep(30);
$getDlpJobRequest = (new GetDlpJobRequest())
->setName($job->getName());
$job = $dlp->getDlpJob($getDlpJobRequest);
if ($job->getState() == JobState::DONE) {
break;
}
$numOfAttempts--;
} while ($numOfAttempts > 0);
// Print finding counts.
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
switch ($job->getState()) {
case JobState::DONE:
$infoTypeStats = $job->getInspectDetails()->getResult()->getInfoTypeStats();
if (count($infoTypeStats) === 0) {
printf('No findings.' . PHP_EOL);
} else {
foreach ($infoTypeStats as $infoTypeStat) {
printf(
' Found %s instance(s) of infoType %s' . PHP_EOL,
$infoTypeStat->getCount(),
$infoTypeStat->getInfoType()->getName()
);
}
}
break;
case JobState::FAILED:
printf('Job %s had errors:' . PHP_EOL, $job->getName());
$errors = $job->getErrors();
foreach ($errors as $error) {
var_dump($error->getDetails());
}
break;
case JobState::PENDING:
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
break;
default:
printf('Unexpected job state. Most likely, the job is either running or has not yet started.');
}
}
Python
Para saber como instalar e usar a biblioteca de cliente para proteção de dados sensíveis, consulte Bibliotecas de cliente de proteção de dados sensíveis.
Para usar a proteção de dados sensíveis, configure o Application Default Credentials. Para mais informações, consulte Configurar a autenticação para um ambiente de desenvolvimento local.
import time
from typing import List
import google.cloud.dlp
def deidentify_cloud_storage(
project: str,
input_gcs_bucket: str,
output_gcs_bucket: str,
info_types: List[str],
deid_template_id: str,
structured_deid_template_id: str,
image_redact_template_id: str,
dataset_id: str,
table_id: str,
timeout: int = 300,
) -> None:
"""
Uses the Data Loss Prevention API to de-identify files in a Google Cloud
Storage directory.
Args:
project: The Google Cloud project id to use as a parent resource.
input_gcs_bucket: The name of google cloud storage bucket to inspect.
output_gcs_bucket: The name of google cloud storage bucket where
de-identified files would be stored.
info_types: A list of strings representing info types to look for.
A full list of info type categories can be fetched from the API.
deid_template_id: The name of the de-identify template for
unstructured and structured files.
structured_deid_template_id: The name of the de-identify template
for structured files.
image_redact_template_id: The name of the image redaction template
for images.
dataset_id: The identifier of the BigQuery dataset where transformation
details would be stored.
table_id: The identifier of the BigQuery table where transformation
details would be stored.
timeout: The number of seconds to wait for a response from the API.
"""
# Instantiate a client.
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Construct the configuration dictionary.
# Specify the type of info the inspection will look for.
# See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types.
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
# Construct cloud_storage_options dictionary with the bucket's URL.
storage_config = {
"cloud_storage_options": {"file_set": {"url": f"gs://{input_gcs_bucket}"}}
}
# Specify the big query table to store the transformation details.
big_query_table = {
"project_id": project,
"dataset_id": dataset_id,
"table_id": table_id,
}
# Convert the project id into a full resource id.
parent = f"projects/{project}/locations/global"
# Construct Transformation Configuration with de-identify Templates used
# for transformation.
transformation_config = {
"deidentify_template": f"{parent}/deidentifyTemplates/{deid_template_id}",
"structured_deidentify_template": f"{parent}/deidentifyTemplates/{structured_deid_template_id}",
"image_redact_template": f"{parent}/deidentifyTemplates/{image_redact_template_id}",
}
# Tell the API where to send notification when the job is completed.
actions = [
{
"deidentify": {
"cloud_storage_output": f"gs://{output_gcs_bucket}",
"transformation_config": transformation_config,
"transformation_details_storage_config": {"table": big_query_table},
"file_types_to_transform": ["IMAGE", "CSV", "TEXT_FILE"],
}
}
]
# Construct the job definition.
inspect_job = {
"inspect_config": inspect_config,
"storage_config": storage_config,
"actions": actions,
}
# Call the API.
response = dlp.create_dlp_job(
request={
"parent": parent,
"inspect_job": inspect_job,
}
)
job_name = response.name
print(f"Inspection Job started : {job_name}")
# Waiting for the job to get completed.
job = dlp.get_dlp_job(request={"name": job_name})
# Since the sleep time is kept as 30s, number of calls would be timeout/30.
no_of_attempts = timeout // 30
while no_of_attempts != 0:
# Check if the job has completed.
if job.state == google.cloud.dlp_v2.DlpJob.JobState.DONE:
break
if job.state == google.cloud.dlp_v2.DlpJob.JobState.FAILED:
print("Job Failed, Please check the configuration.")
break
# Sleep for a short duration before checking the job status again.
time.sleep(30)
no_of_attempts -= 1
# Get DLP job status.
job = dlp.get_dlp_job(request={"name": job_name})
if job.state != google.cloud.dlp_v2.DlpJob.JobState.DONE:
print(f"Job did not complete within {timeout} minutes.")
return
# Print out the results.
print(f"Job name: {job.name}")
result = job.inspect_details.result
print(f"Processed Bytes: {result.processed_bytes}")
if result.info_type_stats:
for stats in result.info_type_stats:
print(f"Info type: {stats.info_type.name}")
print(f"Count: {stats.count}")
else:
print("No findings.")
A seguir
Para pesquisar e filtrar exemplos de código de outros produtos do Google Cloud, consulte o navegador de amostra do Google Cloud.