Dateisatz erstellen

Mit Sammlungen den Überblick behalten Sie können Inhalte basierend auf Ihren Einstellungen speichern und kategorisieren.

Dateisatz innerhalb einer Eintragsgruppe erstellen.

Dokumentationsseiten mit diesem Codebeispiel

Die folgenden Dokumente enthalten das Codebeispiel im Kontext:

Codebeispiel

Java

Bevor Sie dieses Beispiel anwenden, folgen Sie den Schritten zur Einrichtung von Java in der Data Catalog-Kurzanleitung mit Clientbibliotheken. Weitere Informationen finden Sie in der Referenzdokumentation zur Data Catalog Java API.

import com.google.cloud.datacatalog.v1.ColumnSchema;
import com.google.cloud.datacatalog.v1.CreateEntryRequest;
import com.google.cloud.datacatalog.v1.DataCatalogClient;
import com.google.cloud.datacatalog.v1.Entry;
import com.google.cloud.datacatalog.v1.EntryGroupName;
import com.google.cloud.datacatalog.v1.EntryType;
import com.google.cloud.datacatalog.v1.GcsFilesetSpec;
import com.google.cloud.datacatalog.v1.Schema;
import java.io.IOException;

// Sample to create file set entry
public class CreateFilesetEntry {

  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "my-project-id";
    String entryGroupId = "fileset_entry_group";
    String entryId = "fileset_entry_id";
    createFilesetEntry(projectId, entryGroupId, entryId);
  }

  // Create Fileset Entry.
  public static void createFilesetEntry(String projectId, String entryGroupId, String entryId)
      throws IOException {
    // Currently, Data Catalog stores metadata in the us-central1 region.
    String location = "us-central1";

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (DataCatalogClient dataCatalogClient = DataCatalogClient.create()) {
      // Construct the Entry for the Entry request.
      Entry entry =
          Entry.newBuilder()
              .setDisplayName("My Fileset")
              .setDescription("This fileset consists of ....")
              .setGcsFilesetSpec(
                  GcsFilesetSpec.newBuilder().addFilePatterns("gs://cloud-samples-data/*").build())
              .setSchema(
                  Schema.newBuilder()
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("first_name")
                              .setDescription("First name")
                              .setMode("REQUIRED")
                              .setType("STRING")
                              .build())
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("last_name")
                              .setDescription("Last name")
                              .setMode("REQUIRED")
                              .setType("STRING")
                              .build())
                      .addColumns(
                          ColumnSchema.newBuilder()
                              .setColumn("addresses")
                              .setDescription("Addresses")
                              .setMode("REPEATED")
                              .setType("RECORD")
                              .addSubcolumns(
                                  ColumnSchema.newBuilder()
                                      .setColumn("city")
                                      .setDescription("City")
                                      .setMode("NULLABLE")
                                      .setType("STRING")
                                      .build())
                              .addSubcolumns(
                                  ColumnSchema.newBuilder()
                                      .setColumn("state")
                                      .setDescription("State")
                                      .setMode("NULLABLE")
                                      .setType("STRING")
                                      .build())
                              .build())
                      .build())
              .setType(EntryType.FILESET)
              .build();

      // Construct the Entry request to be sent by the client.
      CreateEntryRequest entryRequest =
          CreateEntryRequest.newBuilder()
              .setParent(EntryGroupName.of(projectId, location, entryGroupId).toString())
              .setEntryId(entryId)
              .setEntry(entry)
              .build();

      // Use the client to send the API request.
      Entry entryCreated = dataCatalogClient.createEntry(entryRequest);
      System.out.printf("Entry created with name: %s", entryCreated.getName());
    }
  }
}

Node.js

Bevor Sie dieses Beispiel ausprobieren, folgen Sie der Einrichtungsanleitung für Node.js in der Data Catalog-Kurzanleitung mit Clientbibliotheken. Weitere Informationen finden Sie in der Referenzdokumentation zur Data Catalog Node.js API.

// Import the Google Cloud client library.
const {DataCatalogClient} = require('@google-cloud/datacatalog').v1;
const datacatalog = new DataCatalogClient();

async function createFileset() {
  // Create a fileset within an entry group.

  /**
   * TODO(developer): Uncomment the following lines before running the sample.
   */
  // const projectId = 'my_project';
  // const entryGroupId = 'my_entry_group';
  // const entryId = 'my_entry';

  // Currently, Data Catalog stores metadata in the us-central1 region.
  const location = 'us-central1';

  // Delete any pre-existing Entry with the same name that will be used
  // when creating the new Entry.
  try {
    const formattedName = datacatalog.entryPath(
      projectId,
      location,
      entryGroupId,
      entryId
    );
    await datacatalog.deleteEntry({name: formattedName});
  } catch (err) {
    console.log('Entry does not exist.');
  }

  // Delete any pre-existing Entry Group with the same name
  // that will be used to create the new Entry Group.
  try {
    const formattedName = datacatalog.entryGroupPath(
      projectId,
      location,
      entryGroupId
    );
    await datacatalog.deleteEntryGroup({name: formattedName});
  } catch (err) {
    console.log('Entry Group does not exist.');
  }

  // Construct the Entry Group for the Entry Group request.
  const entryGroup = {
    displayName: 'My Fileset Entry Group',
    description: 'This Entry Group consists of ....',
  };

  // Construct the Entry Group request to be sent by the client.
  const entryGroupRequest = {
    parent: datacatalog.locationPath(projectId, location),
    entryGroupId: entryGroupId,
    entryGroup: entryGroup,
  };

  // Use the client to send the API request.
  await datacatalog.createEntryGroup(entryGroupRequest);

  // Construct the Entry for the Entry request.
  const FILESET_TYPE = 4;

  const entry = {
    displayName: 'My Fileset',
    description: 'This fileset consists of ....',
    gcsFilesetSpec: {filePatterns: ['gs://my_bucket/*']},
    schema: {
      columns: [
        {
          column: 'city',
          description: 'City',
          mode: 'NULLABLE',
          type: 'STRING',
        },
        {
          column: 'state',
          description: 'State',
          mode: 'NULLABLE',
          type: 'STRING',
        },
        {
          column: 'addresses',
          description: 'Addresses',
          mode: 'REPEATED',
          subcolumns: [
            {
              column: 'city',
              description: 'City',
              mode: 'NULLABLE',
              type: 'STRING',
            },
            {
              column: 'state',
              description: 'State',
              mode: 'NULLABLE',
              type: 'STRING',
            },
          ],
          type: 'RECORD',
        },
      ],
    },
    type: FILESET_TYPE,
  };

  // Construct the Entry request to be sent by the client.
  const request = {
    parent: datacatalog.entryGroupPath(projectId, location, entryGroupId),
    entryId: entryId,
    entry: entry,
  };

  // Use the client to send the API request.
  const [response] = await datacatalog.createEntry(request);

  console.log(`Name: ${response.name}`);
  console.log(`Display name: ${response.displayName}`);
  console.log(`Type: ${response.type}`);
}
createFileset();

Python

Bevor Sie dieses Beispiel anwenden, folgen Sie den Schritten zur Einrichtung von Python in der Data Catalog-Kurzanleitung mit Clientbibliotheken. Weitere Informationen finden Sie in der Referenzdokumentation zur Data Catalog Python API.

# Import required modules.
from google.cloud import datacatalog_v1

# TODO: Set these values before running the sample.
project_id = "project_id"
fileset_entry_group_id = "entry_group_id"
fileset_entry_id = "entry_id"

# For all regions available, see:
# https://cloud.google.com/data-catalog/docs/concepts/regions
location = "us-central1"

datacatalog = datacatalog_v1.DataCatalogClient()

# Create an Entry Group.
entry_group_obj = datacatalog_v1.types.EntryGroup()
entry_group_obj.display_name = "My Fileset Entry Group"
entry_group_obj.description = "This Entry Group consists of ...."

entry_group = datacatalog.create_entry_group(
    parent=datacatalog_v1.DataCatalogClient.common_location_path(
        project_id, location
    ),
    entry_group_id=fileset_entry_group_id,
    entry_group=entry_group_obj,
)
print(f"Created entry group: {entry_group.name}")

# Create a Fileset Entry.
entry = datacatalog_v1.types.Entry()
entry.display_name = "My Fileset"
entry.description = "This fileset consists of ...."
entry.gcs_fileset_spec.file_patterns.append("gs://my_bucket/*.csv")
entry.type_ = datacatalog_v1.EntryType.FILESET

# Create the Schema, for example when you have a csv file.
entry.schema.columns.append(
    datacatalog_v1.types.ColumnSchema(
        column="first_name",
        description="First name",
        mode="REQUIRED",
        type_="STRING",
    )
)

entry.schema.columns.append(
    datacatalog_v1.types.ColumnSchema(
        column="last_name", description="Last name", mode="REQUIRED", type_="STRING"
    )
)

# Create the addresses parent column
addresses_column = datacatalog_v1.types.ColumnSchema(
    column="addresses", description="Addresses", mode="REPEATED", type_="RECORD"
)

# Create sub columns for the addresses parent column
addresses_column.subcolumns.append(
    datacatalog_v1.types.ColumnSchema(
        column="city", description="City", mode="NULLABLE", type_="STRING"
    )
)

addresses_column.subcolumns.append(
    datacatalog_v1.types.ColumnSchema(
        column="state", description="State", mode="NULLABLE", type_="STRING"
    )
)

entry.schema.columns.append(addresses_column)

entry = datacatalog.create_entry(
    parent=entry_group.name, entry_id=fileset_entry_id, entry=entry
)
print(f"Created fileset entry: {entry.name}")

Nächste Schritte

Informationen zum Suchen und Filtern von Codebeispielen für andere Google Cloud-Produkte finden Sie im Google Cloud-Beispielbrowser.