使用 Hive 分区创建外部表。
代码示例
Go
试用此示例之前,请按照 BigQuery 快速入门:使用客户端库中的 Go 设置说明进行操作。如需了解详情,请参阅 BigQuery Go API 参考文档。
如需向 BigQuery 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为客户端库设置身份验证。
import (
"context"
"fmt"
"cloud.google.com/go/bigquery"
)
// createTableExternalHivePartitioned demonstrates creating an external table with hive partitioning.
func createTableExternalHivePartitioned(projectID, datasetID, tableID string) error {
// projectID := "my-project-id"
// datasetID := "mydatasetid"
// tableID := "mytableid"
ctx := context.Background()
client, err := bigquery.NewClient(ctx, projectID)
if err != nil {
return fmt.Errorf("bigquery.NewClient: %w", err)
}
defer client.Close()
// First, we'll define table metadata to represent a table that's backed by parquet files held in
// Cloud Storage.
//
// Example file:
// gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/dt=2020-11-15/file1.parquet
metadata := &bigquery.TableMetadata{
Description: "An example table that demonstrates hive partitioning against external parquet files",
ExternalDataConfig: &bigquery.ExternalDataConfig{
SourceFormat: bigquery.Parquet,
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
AutoDetect: true,
},
}
// The layout of the files in here is compatible with the layout requirements for hive partitioning,
// so we can add an optional Hive partitioning configuration to leverage the object paths for deriving
// partitioning column information.
//
// For more information on how partitions are extracted, see:
// https://cloud.google.com/bigquery/docs/hive-partitioned-queries-gcs
//
// We have a "/dt=YYYY-MM-DD/" path component in our example files as documented above. Autolayout will
// expose this as a column named "dt" of type DATE.
metadata.ExternalDataConfig.HivePartitioningOptions = &bigquery.HivePartitioningOptions{
Mode: bigquery.AutoHivePartitioningMode,
SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/",
RequirePartitionFilter: true,
}
// Create the external table.
tableRef := client.Dataset(datasetID).Table(tableID)
if err := tableRef.Create(ctx, metadata); err != nil {
return fmt.Errorf("table creation failure: %w", err)
}
return nil
}
Java
试用此示例之前,请按照 BigQuery 快速入门:使用客户端库中的 Java 设置说明进行操作。如需了解详情,请参阅 BigQuery Java API 参考文档。
如需向 BigQuery 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为客户端库设置身份验证。
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.FormatOptions;
import com.google.cloud.bigquery.HivePartitioningOptions;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
// Sample to create external table using hive partitioning
public class CreateTableExternalHivePartitioned {
public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
String sourceUri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*";
String sourceUriPrefix =
"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/";
createTableExternalHivePartitioned(datasetName, tableName, sourceUriPrefix, sourceUri);
}
public static void createTableExternalHivePartitioned(
String datasetName, String tableName, String sourceUriPrefix, String sourceUri) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Configuring partitioning options
HivePartitioningOptions hivePartitioningOptions =
HivePartitioningOptions.newBuilder()
.setMode("CUSTOM")
.setRequirePartitionFilter(true)
.setSourceUriPrefix(sourceUriPrefix)
.build();
TableId tableId = TableId.of(datasetName, tableName);
ExternalTableDefinition customTable =
ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet())
.setAutodetect(true)
.setHivePartitioningOptions(hivePartitioningOptions)
.build();
bigquery.create(TableInfo.of(tableId, customTable));
System.out.println("External table created using hivepartitioningoptions");
} catch (BigQueryException e) {
System.out.println("External table was not created" + e.toString());
}
}
}
Python
试用此示例之前,请按照 BigQuery 快速入门:使用客户端库中的 Python 设置说明进行操作。如需了解详情,请参阅 BigQuery Python API 参考文档。
如需向 BigQuery 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为客户端库设置身份验证。
# Demonstrates creating an external table with hive partitioning.
# TODO(developer): Set table_id to the ID of the table to create.
table_id = "your-project.your_dataset.your_table_name"
# TODO(developer): Set source uri.
# Example file:
# gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/dt=2020-11-15/file1.parquet
uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
# TODO(developer): Set source uri prefix.
source_uri_prefix = (
"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
)
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# Configure the external data source.
external_config = bigquery.ExternalConfig("PARQUET")
external_config.source_uris = [uri]
external_config.autodetect = True
# Configure partitioning options.
hive_partitioning_opts = bigquery.HivePartitioningOptions()
# The layout of the files in here is compatible with the layout requirements for hive partitioning,
# so we can add an optional Hive partitioning configuration to leverage the object paths for deriving
# partitioning column information.
# For more information on how partitions are extracted, see:
# https://cloud.google.com/bigquery/docs/hive-partitioned-queries-gcs
# We have a "/dt=YYYY-MM-DD/" path component in our example files as documented above.
# Autolayout will expose this as a column named "dt" of type DATE.
hive_partitioning_opts.mode = "AUTO"
hive_partitioning_opts.require_partition_filter = True
hive_partitioning_opts.source_uri_prefix = source_uri_prefix
external_config.hive_partitioning = hive_partitioning_opts
table = bigquery.Table(table_id)
table.external_data_configuration = external_config
table = client.create_table(table) # Make an API request.
print(
"Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
)
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。