Create external table with hive partitioning

Create an external table using hive partitioning.

Code sample

Go

Before trying this sample, follow the Go setup instructions in the BigQuery quickstart using client libraries. For more information, see the BigQuery Go API reference documentation.

import (
	"context"
	"fmt"

	"cloud.google.com/go/bigquery"
)

// createTableExternalHivePartitioned demonstrates creating an external table with hive partitioning.
func createTableExternalHivePartitioned(projectID, datasetID, tableID string) error {
	// projectID := "my-project-id"
	// datasetID := "mydatasetid"
	// tableID := "mytableid"
	ctx := context.Background()

	client, err := bigquery.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("bigquery.NewClient: %v", err)
	}
	defer client.Close()

	// First, we'll define table metadata to represent a table that's backed by parquet files held in
	// Cloud Storage.
	//
	// Example file:
	// gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/dt=2020-11-15/file1.parquet
	metadata := &bigquery.TableMetadata{
		Description: "An example table that demonstrates hive partitioning against external parquet files",
		ExternalDataConfig: &bigquery.ExternalDataConfig{
			SourceFormat: bigquery.Parquet,
			SourceURIs:   []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
			AutoDetect:   true,
		},
	}

	// The layout of the files in here is compatible with the layout requirements for hive partitioning,
	// so we can add an optional Hive partitioning configuration to leverage the object paths for deriving
	// partitioning column information.
	//
	// For more information on how partitions are extracted, see:
	// https://cloud.google.com/bigquery/docs/hive-partitioned-queries-gcs
	//
	// We have a "/dt=YYYY-MM-DD/" path component in our example files as documented above.  Autolayout will
	// expose this as a column named "dt" of type DATE.
	metadata.ExternalDataConfig.HivePartitioningOptions = &bigquery.HivePartitioningOptions{
		Mode:                   bigquery.AutoHivePartitioningMode,
		SourceURIPrefix:        "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/",
		RequirePartitionFilter: true,
	}

	// Create the external table.
	tableRef := client.Dataset(datasetID).Table(tableID)
	if err := tableRef.Create(ctx, metadata); err != nil {
		return fmt.Errorf("table creation failure: %v", err)
	}
	return nil
}

Java

Before trying this sample, follow the Java setup instructions in the BigQuery quickstart using client libraries. For more information, see the BigQuery Java API reference documentation.

import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.FormatOptions;
import com.google.cloud.bigquery.HivePartitioningOptions;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;

// Sample to create external table using hive partitioning
public class CreateTableExternalHivePartitioned {

  public static void main(String[] args) {
    // TODO(developer): Replace these variables before running the sample.
    String datasetName = "MY_DATASET_NAME";
    String tableName = "MY_TABLE_NAME";
    String sourceUri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*";
    String sourceUriPrefix =
        "gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/";
    createTableExternalHivePartitioned(datasetName, tableName, sourceUriPrefix, sourceUri);
  }

  public static void createTableExternalHivePartitioned(
      String datasetName, String tableName, String sourceUriPrefix, String sourceUri) {
    try {
      // Initialize client that will be used to send requests. This client only needs to be created
      // once, and can be reused for multiple requests.
      BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

      // Configuring partitioning options
      HivePartitioningOptions hivePartitioningOptions =
          HivePartitioningOptions.newBuilder()
              .setMode("CUSTOM")
              .setRequirePartitionFilter(true)
              .setSourceUriPrefix(sourceUriPrefix)
              .build();

      TableId tableId = TableId.of(datasetName, tableName);
      ExternalTableDefinition customTable =
          ExternalTableDefinition.newBuilder(sourceUri, FormatOptions.parquet())
              .setAutodetect(true)
              .setHivePartitioningOptions(hivePartitioningOptions)
              .build();
      bigquery.create(TableInfo.of(tableId, customTable));
      System.out.println("External table created using hivepartitioningoptions");
    } catch (BigQueryException e) {
      System.out.println("External table was not created" + e.toString());
    }
  }
}

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.