Create an inspection job

Creates an inspection job with the Cloud Data Loss Prevention API.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


using System;
using System.Linq;
using Google.Api.Gax.ResourceNames;
using Google.Cloud.Dlp.V2;
using static Google.Cloud.Dlp.V2.StorageConfig.Types;

public class JobsCreate
{
    public static DlpJob CreateJob(string projectId, string gcsPath)
    {
        var dlp = DlpServiceClient.Create();

        var storageConfig = new StorageConfig
        {
            CloudStorageOptions = new CloudStorageOptions
            {
                FileSet = new CloudStorageOptions.Types.FileSet()
                {
                    Url = gcsPath
                }
            },
            TimespanConfig = new TimespanConfig
            {
                EnableAutoPopulationOfTimespanConfig = true
            }
        };

        var inspectConfig = new InspectConfig
        {
            InfoTypes = { new[] { "EMAIL_ADDRESS", "CREDIT_CARD_NUMBER" }.Select(it => new InfoType() { Name = it }) },
            IncludeQuote = true,
            MinLikelihood = Likelihood.Unlikely,
            Limits = new InspectConfig.Types.FindingLimits() { MaxFindingsPerItem = 100 }
        };

        var response = dlp.CreateDlpJob(new CreateDlpJobRequest
        {
            Parent = new LocationName(projectId, "global").ToString(),
            InspectJob = new InspectJobConfig
            {
                InspectConfig = inspectConfig,
                StorageConfig = storageConfig,
            }
        });

        Console.WriteLine($"Job: {response.Name} status: {response.State}");

        return response;
    }
}

Go

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import (
	"context"
	"fmt"
	"io"

	dlp "cloud.google.com/go/dlp/apiv2"
	"cloud.google.com/go/dlp/apiv2/dlppb"
)

// createJob creates an inspection job
func createJob(w io.Writer, projectID, gcsPath string, infoTypeNames []string) error {
	// projectID := "my-project-id"
	// gcsPath := "gs://" + "your-bucket-name" + "path/to/file.txt";
	// infoTypeNames := []string{"EMAIL_ADDRESS", "PERSON_NAME", "LOCATION", "PHONE_NUMBER"}

	ctx := context.Background()

	// Initialize a client once and reuse it to send multiple requests. Clients
	// are safe to use across goroutines. When the client is no longer needed,
	// call the Close method to cleanup its resources.
	client, err := dlp.NewClient(ctx)
	if err != nil {
		return err
	}

	// Closing the client safely cleans up background resources.
	defer client.Close()

	// Specify the GCS file to be inspected.
	storageConfig := &dlppb.StorageConfig{
		Type: &dlppb.StorageConfig_CloudStorageOptions{
			CloudStorageOptions: &dlppb.CloudStorageOptions{
				FileSet: &dlppb.CloudStorageOptions_FileSet{
					Url: gcsPath,
				},
			},
		},

		// Set autoPopulateTimespan to true to scan only new content.
		TimespanConfig: &dlppb.StorageConfig_TimespanConfig{
			EnableAutoPopulationOfTimespanConfig: true,
		},
	}

	// Specify the type of info the inspection will look for.
	// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types.
	var infoTypes []*dlppb.InfoType
	for _, c := range infoTypeNames {
		infoTypes = append(infoTypes, &dlppb.InfoType{Name: c})
	}

	inspectConfig := &dlppb.InspectConfig{
		InfoTypes:    infoTypes,
		IncludeQuote: true,

		// The minimum likelihood required before returning a match:
		// See: https://cloud.google.com/dlp/docs/likelihood
		MinLikelihood: dlppb.Likelihood_UNLIKELY,

		// The maximum number of findings to report (0 = server maximum)
		Limits: &dlppb.InspectConfig_FindingLimits{
			MaxFindingsPerItem: 100,
		},
	}

	// Create and send the request.
	req := dlppb.CreateDlpJobRequest{
		Parent: fmt.Sprintf("projects/%s/locations/global", projectID),
		Job: &dlppb.CreateDlpJobRequest_InspectJob{
			InspectJob: &dlppb.InspectJobConfig{
				InspectConfig: inspectConfig,
				StorageConfig: storageConfig,
			},
		},
	}

	// Send the request.
	response, err := client.CreateDlpJob(ctx, &req)
	if err != nil {
		return err
	}

	// Print the results.
	fmt.Fprintf(w, "Created a Dlp Job %v and Status is: %v", response.Name, response.State)
	return nil
}

Java

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.privacy.dlp.v2.Action;
import com.google.privacy.dlp.v2.CloudStorageOptions;
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
import com.google.privacy.dlp.v2.DlpJob;
import com.google.privacy.dlp.v2.InfoType;
import com.google.privacy.dlp.v2.InspectConfig;
import com.google.privacy.dlp.v2.InspectJobConfig;
import com.google.privacy.dlp.v2.Likelihood;
import com.google.privacy.dlp.v2.LocationName;
import com.google.privacy.dlp.v2.StorageConfig;
import com.google.privacy.dlp.v2.StorageConfig.TimespanConfig;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class JobsCreate {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-project-id";
    String gcsPath = "gs://" + "your-bucket-name" + "path/to/file.txt";
    createJobs(projectId, gcsPath);
  }

  // Creates a DLP Job
  public static void createJobs(String projectId, String gcsPath) throws IOException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {

      // Set autoPopulateTimespan to true to scan only new content
      boolean autoPopulateTimespan = true;
      TimespanConfig timespanConfig =
          TimespanConfig.newBuilder()
              .setEnableAutoPopulationOfTimespanConfig(autoPopulateTimespan)
              .build();

      // Specify the GCS file to be inspected.
      CloudStorageOptions cloudStorageOptions =
          CloudStorageOptions.newBuilder()
              .setFileSet(CloudStorageOptions.FileSet.newBuilder().setUrl(gcsPath))
              .build();
      StorageConfig storageConfig =
          StorageConfig.newBuilder()
              .setCloudStorageOptions(cloudStorageOptions)
              .setTimespanConfig(timespanConfig)
              .build();

      // Specify the type of info the inspection will look for.
      // See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
      List<InfoType> infoTypes =
          Stream.of("EMAIL_ADDRESS", "PERSON_NAME", "LOCATION", "PHONE_NUMBER")
              .map(it -> InfoType.newBuilder().setName(it).build())
              .collect(Collectors.toList());
      // The minimum likelihood required before returning a match:
      // See: https://cloud.google.com/dlp/docs/likelihood
      Likelihood minLikelihood = Likelihood.UNLIKELY;

      // The maximum number of findings to report (0 = server maximum)
      InspectConfig.FindingLimits findingLimits =
          InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerItem(100).build();

      InspectConfig inspectConfig =
          InspectConfig.newBuilder()
              .addAllInfoTypes(infoTypes)
              .setIncludeQuote(true)
              .setMinLikelihood(minLikelihood)
              .setLimits(findingLimits)
              .build();

      // Specify the action that is triggered when the job completes.
      Action.PublishSummaryToCscc publishSummaryToCscc =
          Action.PublishSummaryToCscc.getDefaultInstance();
      Action action = Action.newBuilder().setPublishSummaryToCscc(publishSummaryToCscc).build();

      // Configure the inspection job we want the service to perform.
      InspectJobConfig inspectJobConfig =
          InspectJobConfig.newBuilder()
              .setInspectConfig(inspectConfig)
              .setStorageConfig(storageConfig)
              .addActions(action)
              .build();

      // Construct the job creation request to be sent by the client.
      CreateDlpJobRequest createDlpJobRequest =
          CreateDlpJobRequest.newBuilder()
              .setParent(LocationName.of(projectId, "global").toString())
              .setInspectJob(inspectJobConfig)
              .build();

      // Send the job creation request and process the response.
      DlpJob createdDlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
      System.out.println("Job created successfully: " + createdDlpJob.getName());
    }
  }
}

Node.js

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Initialize google DLP Client
const dlp = new DLP.DlpServiceClient();

async function jobsCreate() {
  // Construct cloud storage configuration
  const cloudStorageConfig = {
    cloudStorageOptions: {
      fileSet: {
        url: cloudFileUrl,
      },
    },
    timespanConfig: {
      enableAutoPopulationOfTimespanConfig: true,
    },
  };

  // Construct inspect job configuration
  const inspectJob = {
    storageConfig: cloudStorageConfig,
  };

  // Construct inspect configuration
  const inspectConfig = {
    infoTypes: [
      {name: 'EMAIL_ADDRESS'},
      {name: 'PERSON_NAME'},
      {name: 'LOCATION'},
      {name: 'PHONE_NUMBER'},
    ],
    includeQuote: true,
    minLikelihood: DLP.protos.google.privacy.dlp.v2.Likelihood.LIKELY,
    excludeInfoTypes: false,
  };

  // Combine configurations into a request for the service.
  const request = {
    parent: `projects/${projectId}/locations/global`,
    inspectJob: inspectJob,
    inspectConfig: inspectConfig,
  };

  // Send the request and receive response from the service
  const [response] = await dlp.createDlpJob(request);
  // Print the results
  console.log(`Job created successfully: ${response.name}`);
}

jobsCreate();

PHP

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

use Google\Cloud\Dlp\V2\Action;
use Google\Cloud\Dlp\V2\Action\PublishSummaryToCscc;
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\CloudStorageOptions;
use Google\Cloud\Dlp\V2\CloudStorageOptions\FileSet;
use Google\Cloud\Dlp\V2\CreateDlpJobRequest;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\InspectConfig\FindingLimits;
use Google\Cloud\Dlp\V2\InspectJobConfig;
use Google\Cloud\Dlp\V2\Likelihood;
use Google\Cloud\Dlp\V2\StorageConfig;
use Google\Cloud\Dlp\V2\StorageConfig\TimespanConfig;

/**
 * Creates an inspection job with the Cloud Data Loss Prevention API.
 *
 * @param string $callingProjectId  The project ID to run the API call under.
 * @param string $gcsPath           GCS file to be inspected. Example : gs://GOOGLE_STORAGE_BUCKET_NAME/dlp_sample.csv
 */
function create_job(
    string $callingProjectId,
    string $gcsPath
): void {
    // Instantiate a client.
    $dlp = new DlpServiceClient();

    // Set autoPopulateTimespan to true to scan only new content.
    $timespanConfig = (new TimespanConfig())
        ->setEnableAutoPopulationOfTimespanConfig(true);

    // Specify the GCS file to be inspected.
    $cloudStorageOptions = (new CloudStorageOptions())
        ->setFileSet((new FileSet())
            ->setUrl($gcsPath));
    $storageConfig = (new StorageConfig())
        ->setCloudStorageOptions(($cloudStorageOptions))
        ->setTimespanConfig($timespanConfig);

    // ----- Construct inspection config -----
    $emailAddressInfoType = (new InfoType())
        ->setName('EMAIL_ADDRESS');
    $personNameInfoType = (new InfoType())
        ->setName('PERSON_NAME');
    $locationInfoType = (new InfoType())
        ->setName('LOCATION');
    $phoneNumberInfoType = (new InfoType())
        ->setName('PHONE_NUMBER');
    $infoTypes = [$emailAddressInfoType, $personNameInfoType, $locationInfoType, $phoneNumberInfoType];

    // Whether to include the matching string in the response.
    $includeQuote = true;
    // The minimum likelihood required before returning a match.
    $minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED;

    // The maximum number of findings to report (0 = server maximum).
    $limits = (new FindingLimits())
        ->setMaxFindingsPerRequest(100);

    // Create the Inspect configuration object.
    $inspectConfig = (new InspectConfig())
        ->setMinLikelihood($minLikelihood)
        ->setLimits($limits)
        ->setInfoTypes($infoTypes)
        ->setIncludeQuote($includeQuote);

    // Specify the action that is triggered when the job completes.
    $action = (new Action())
        ->setPublishSummaryToCscc(new PublishSummaryToCscc());

    // Configure the inspection job we want the service to perform.
    $inspectJobConfig = (new InspectJobConfig())
        ->setInspectConfig($inspectConfig)
        ->setStorageConfig($storageConfig)
        ->setActions([$action]);

    // Send the job creation request and process the response.
    $parent = "projects/$callingProjectId/locations/global";
    $createDlpJobRequest = (new CreateDlpJobRequest())
        ->setParent($parent)
        ->setInspectJob($inspectJobConfig);
    $job = $dlp->createDlpJob($createDlpJobRequest);

    // Print results.
    printf($job->getName());
}

Python

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries.

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


import google.cloud.dlp


def create_dlp_job(
    project: str,
    bucket: str,
    info_types: list[str],
    job_id: str = None,
    max_findings: int = 100,
    auto_populate_timespan: bool = True,
) -> None:
    """Uses the Data Loss Prevention API to create a DLP job.
    Args:
        project: The project id to use as a parent resource.
        bucket: The name of the GCS bucket to scan. This sample scans all
            files in the bucket.
        info_types: A list of strings representing info types to look for.
            A full list of info type categories can be fetched from the API.
        job_id: The id of the job. If omitted, an id will be randomly generated.
        max_findings: The maximum number of findings to report; 0 = no maximum.
        auto_populate_timespan: Automatically populates time span config start
            and end times in order to scan new content only.
    """

    # Instantiate a client.
    dlp = google.cloud.dlp_v2.DlpServiceClient()

    # Convert the project id into a full resource id.
    parent = f"projects/{project}"

    # Prepare info_types by converting the list of strings into a list of
    # dictionaries (protos are also accepted).
    info_types = [{"name": info_type} for info_type in info_types]

    # Construct the configuration dictionary. Keys which are None may
    # optionally be omitted entirely.
    inspect_config = {
        "info_types": info_types,
        "min_likelihood": google.cloud.dlp_v2.Likelihood.UNLIKELY,
        "limits": {"max_findings_per_request": max_findings},
        "include_quote": True,
    }

    # Construct a cloud_storage_options dictionary with the bucket's URL.
    url = f"gs://{bucket}/*"
    storage_config = {
        "cloud_storage_options": {"file_set": {"url": url}},
        # Time-based configuration for each storage object.
        "timespan_config": {
            # Auto-populate start and end times in order to scan new objects
            # only.
            "enable_auto_population_of_timespan_config": auto_populate_timespan
        },
    }

    # Construct the job definition.
    job = {"inspect_config": inspect_config, "storage_config": storage_config}

    # Call the API.
    response = dlp.create_dlp_job(
        request={"parent": parent, "inspect_job": job, "job_id": job_id}
    )

    # Print out the result.
    print(f"Job : {response.name} status: {response.state}")

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.