Criar conjunto de arquivos

Crie um conjunto de arquivos em um grupo de entradas.

Páginas de documentação que incluem esta amostra de código

Para visualizar o exemplo de código usado em contexto, consulte a seguinte documentação:

Exemplo de código

Java

Antes de testar essa amostra, siga as instruções de configuração para Java no Guia de início rápido do Data Catalog usando as bibliotecas de cliente. Para mais informações, consulte a documentação de referência da API Data Catalog Java.

import com.google.cloud.datacatalog.v1.ColumnSchema;
import com.google.cloud.datacatalog.v1.CreateEntryRequest;
import com.google.cloud.datacatalog.v1.DataCatalogClient;
import com.google.cloud.datacatalog.v1.Entry;
import com.google.cloud.datacatalog.v1.EntryGroupName;
import com.google.cloud.datacatalog.v1.EntryType;
import com.google.cloud.datacatalog.v1.GcsFilesetSpec;
import com.google.cloud.datacatalog.v1.Schema;
import java.io.IOException;

// Sample to create file set entry
public class CreateFilesetEntry {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "my-project-id";
    String entryGroupId = "fileset_entry_group";
    String entryId = "fileset_entry_id";
    createFilesetEntry(projectId, entryGroupId, entryId);
  }

  // Create Fileset Entry.
  public static void createFilesetEntry(String projectId, String entryGroupId, String entryId)
      throws IOException {
    // Currently, Data Catalog stores metadata in the us-central1 region.
    String location = "us-central1";

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DataCatalogClient dataCatalogClient = DataCatalogClient.create()) {
      // Construct the Entry for the Entry request.
      Entry entry =
          Entry.newBuilder()
              .setDisplayName("My Fileset")
              .setDescription("This fileset consists of ....")
              .setGcsFilesetSpec(
                  GcsFilesetSpec.newBuilder().addFilePatterns("gs://cloud-samples-data/*").build())
              .setSchema(
                  Schema.newBuilder()
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("first_name")
                              .setDescription("First name")
                              .setMode("REQUIRED")
                              .setType("STRING")
                              .build())
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("last_name")
                              .setDescription("Last name")
                              .setMode("REQUIRED")
                              .setType("STRING")
                              .build())
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("addresses")
                              .setDescription("Addresses")
                              .setMode("REPEATED")
                              .setType("RECORD")
                              .addSubcolumns(
                                  ColumnSchema.newBuilder()
                                      .setColumn("city")
                                      .setDescription("City")
                                      .setMode("NULLABLE")
                                      .setType("STRING")
                                      .build())
                              .addSubcolumns(
                                  ColumnSchema.newBuilder()
                                      .setColumn("state")
                                      .setDescription("State")
                                      .setMode("NULLABLE")
                                      .setType("STRING")
                                      .build())
                              .build())
                      .build())
              .setType(EntryType.FILESET)
              .build();

      // Construct the Entry request to be sent by the client.
      CreateEntryRequest entryRequest =
          CreateEntryRequest.newBuilder()
              .setParent(EntryGroupName.of(projectId, location, entryGroupId).toString())
              .setEntryId(entryId)
              .setEntry(entry)
              .build();

      // Use the client to send the API request.
      Entry entryCreated = dataCatalogClient.createEntry(entryRequest);
      System.out.printf("Entry created with name: %s", entryCreated.getName());
    }
  }
}

Node.js

Antes de testar essa amostra, siga as instruções de configuração para Node.js no Guia de início rápido do Data Catalog usando as bibliotecas de cliente. Para mais informações, consulte a documentação de referência da API Data Catalog Node.js.

// Import the Google Cloud client library.
const {DataCatalogClient} = require('@google-cloud/datacatalog').v1;
const datacatalog = new DataCatalogClient();

async function createFileset() {
  // Create a fileset within an entry group.

  /**
   * TODO(developer): Uncomment the following lines before running the sample.
   */
  // const projectId = 'my_project';
  // const entryGroupId = 'my_entry_group';
  // const entryId = 'my_entry';

  // Currently, Data Catalog stores metadata in the us-central1 region.
  const location = 'us-central1';

  // Delete any pre-existing Entry with the same name that will be used
  // when creating the new Entry.
  try {
    const formattedName = datacatalog.entryPath(
      projectId,
      location,
      entryGroupId,
      entryId
    );
    await datacatalog.deleteEntry({name: formattedName});
  } catch (err) {
    console.log('Entry does not exist.');
  }

  // Delete any pre-existing Entry Group with the same name
  // that will be used to create the new Entry Group.
  try {
    const formattedName = datacatalog.entryGroupPath(
      projectId,
      location,
      entryGroupId
    );
    await datacatalog.deleteEntryGroup({name: formattedName});
  } catch (err) {
    console.log('Entry Group does not exist.');
  }

  // Construct the Entry Group for the Entry Group request.
  const entryGroup = {
    displayName: 'My Fileset Entry Group',
    description: 'This Entry Group consists of ....',
  };

  // Construct the Entry Group request to be sent by the client.
  const entryGroupRequest = {
    parent: datacatalog.locationPath(projectId, location),
    entryGroupId: entryGroupId,
    entryGroup: entryGroup,
  };

  // Use the client to send the API request.
  await datacatalog.createEntryGroup(entryGroupRequest);

  // Construct the Entry for the Entry request.
  const FILESET_TYPE = 4;

  const entry = {
    displayName: 'My Fileset',
    description: 'This fileset consists of ....',
    gcsFilesetSpec: {filePatterns: ['gs://my_bucket/*']},
    schema: {
      columns: [
        {
          column: 'city',
          description: 'City',
          mode: 'NULLABLE',
          type: 'STRING',
        },
        {
          column: 'state',
          description: 'State',
          mode: 'NULLABLE',
          type: 'STRING',
        },
        {
          column: 'addresses',
          description: 'Addresses',
          mode: 'REPEATED',
          subcolumns: [
            {
              column: 'city',
              description: 'City',
              mode: 'NULLABLE',
              type: 'STRING',
            },
            {
              column: 'state',
              description: 'State',
              mode: 'NULLABLE',
              type: 'STRING',
            },
          ],
          type: 'RECORD',
        },
      ],
    },
    type: FILESET_TYPE,
  };

  // Construct the Entry request to be sent by the client.
  const request = {
    parent: datacatalog.entryGroupPath(projectId, location, entryGroupId),
    entryId: entryId,
    entry: entry,
  };

  // Use the client to send the API request.
  const [response] = await datacatalog.createEntry(request);

  console.log(`Name: ${response.name}`);
  console.log(`Display name: ${response.displayName}`);
  console.log(`Type: ${response.type}`);
}
createFileset();

Python

Antes de testar essa amostra, siga as instruções de configuração para Python no Guia de início rápido do Data Catalog usando as bibliotecas de cliente. Para mais informações, consulte a documentação de referência da API Data Catalog para Python (em inglês).

# Import required modules.
from google.cloud import datacatalog_v1

# TODO: Set these values before running the sample.
project_id = "project_id"
fileset_entry_group_id = "entry_group_id"
fileset_entry_id = "entry_id"

# For all regions available, see:
# https://cloud.google.com/data-catalog/docs/concepts/regions
location = "us-central1"

datacatalog = datacatalog_v1.DataCatalogClient()

# Create an Entry Group.
entry_group_obj = datacatalog_v1.types.EntryGroup()
entry_group_obj.display_name = "My Fileset Entry Group"
entry_group_obj.description = "This Entry Group consists of ...."

entry_group = datacatalog.create_entry_group(
    parent=datacatalog_v1.DataCatalogClient.common_location_path(
        project_id, location
    ),
    entry_group_id=fileset_entry_group_id,
    entry_group=entry_group_obj,
)
print(f"Created entry group: {entry_group.name}")

# Create a Fileset Entry.
entry = datacatalog_v1.types.Entry()
entry.display_name = "My Fileset"
entry.description = "This fileset consists of ...."
entry.gcs_fileset_spec.file_patterns.append("gs://my_bucket/*.csv")
entry.type_ = datacatalog_v1.EntryType.FILESET

# Create the Schema, for example when you have a csv file.
entry.schema.columns.append(
    datacatalog_v1.types.ColumnSchema(
        column="first_name",
        description="First name",
        mode="REQUIRED",
        type_="STRING",
    )
)

entry.schema.columns.append(
    datacatalog_v1.types.ColumnSchema(
        column="last_name", description="Last name", mode="REQUIRED", type_="STRING"
    )
)

# Create the addresses parent column
addresses_column = datacatalog_v1.types.ColumnSchema(
    column="addresses", description="Addresses", mode="REPEATED", type_="RECORD"
)

# Create sub columns for the addresses parent column
addresses_column.subcolumns.append(
    datacatalog_v1.types.ColumnSchema(
        column="city", description="City", mode="NULLABLE", type_="STRING"
    )
)

addresses_column.subcolumns.append(
    datacatalog_v1.types.ColumnSchema(
        column="state", description="State", mode="NULLABLE", type_="STRING"
    )
)

entry.schema.columns.append(addresses_column)

entry = datacatalog.create_entry(
    parent=entry_group.name, entry_id=fileset_entry_id, entry=entry
)
print(f"Created fileset entry: {entry.name}")

A seguir

Para pesquisar e filtrar exemplos de código de outros produtos do Google Cloud, consulte o navegador de exemplos do Google Cloud.