Clustered table

Load data from a CSV file on Cloud Storage to a clustered table.

Before trying this sample, follow the Go setup instructions in the BigQuery quickstart using client libraries. For more information, see the BigQuery Go API reference documentation.

import (


// importClusteredTable demonstrates creating a table from a load job and defining partitioning and clustering
// properties.
func importClusteredTable(projectID, destDatasetID, destTableID string) error {
	// projectID := "my-project-id"
	// datasetID := "mydataset"
	// tableID := "mytable"
	ctx := context.Background()
	client, err := bigquery.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("bigquery.NewClient: %v", err)
	defer client.Close()

	gcsRef := bigquery.NewGCSReference("gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv")
	gcsRef.SkipLeadingRows = 1
	gcsRef.Schema = bigquery.Schema{
		{Name: "timestamp", Type: bigquery.TimestampFieldType},
		{Name: "origin", Type: bigquery.StringFieldType},
		{Name: "destination", Type: bigquery.StringFieldType},
		{Name: "amount", Type: bigquery.NumericFieldType},
	loader := client.Dataset(destDatasetID).Table(destTableID).LoaderFrom(gcsRef)
	loader.TimePartitioning = &bigquery.TimePartitioning{
		Field: "timestamp",
	loader.Clustering = &bigquery.Clustering{
		Fields: []string{"origin", "destination"},
	loader.WriteDisposition = bigquery.WriteEmpty

	job, err := loader.Run(ctx)
	if err != nil {
		return err
	status, err := job.Wait(ctx)
	if err != nil {
		return err

	if status.Err() != nil {
		return fmt.Errorf("job completed with error: %v", status.Err())
	return nil


Before trying this sample, follow the Java setup instructions in the BigQuery quickstart using client libraries. For more information, see the BigQuery Java API reference documentation.

import java.util.List;

// Sample to load clustered table.
public class LoadTableClustered {

  public static void main(String[] args) {
    // TODO(developer): Replace these variables before running the sample.
    String datasetName = "MY_DATASET_NAME";
    String tableName = "MY_TABLE_NAME";
    String sourceUri = "/path/to/file.csv";
    Schema schema =
            Field.of("name", StandardSQLTypeName.STRING),
            Field.of("post_abbr", StandardSQLTypeName.STRING),
            Field.of("date", StandardSQLTypeName.DATE));
        datasetName, tableName, sourceUri, schema, ImmutableList.of("name", "post_abbr"));

  public static void loadTableClustered(
      String datasetName,
      String tableName,
      String sourceUri,
      Schema schema,
      List<String> clusteringFields) {
    try {
      // Initialize client that will be used to send requests. This client only needs to be created
      // once, and can be reused for multiple requests.
      BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

      TableId tableId = TableId.of(datasetName, tableName);

      TimePartitioning partitioning = TimePartitioning.of(TimePartitioning.Type.DAY);
      // Clustering fields will be consisted of fields mentioned in the schema.
      // BigQuery supports clustering for both partitioned and non-partitioned tables.
      Clustering clustering = Clustering.newBuilder().setFields(clusteringFields).build();

      LoadJobConfiguration loadJobConfig =
          LoadJobConfiguration.builder(tableId, sourceUri)

      Job loadJob = bigquery.create(JobInfo.newBuilder(loadJobConfig).build());

      // Load data from a GCS parquet file into the table
      // Blocks until this load table job completes its execution, either failing or succeeding.
      Job job = loadJob.waitFor();

      // Check for errors
      if (job.isDone() && job.getStatus().getError() == null) {
        System.out.println("Data successfully loaded into clustered table during load job");
      } else {
            "BigQuery was unable to load into the table due to an error:"
                + job.getStatus().getError());
    } catch (BigQueryException | InterruptedException e) {
      System.out.println("Data not loaded into clustered table during load job \n" + e.toString());


Before trying this sample, follow the Node.js setup instructions in the BigQuery quickstart using client libraries. For more information, see the BigQuery Node.js API reference documentation.

// Import the Google Cloud client library
const {BigQuery} = require('@google-cloud/bigquery');
const {Storage} = require('@google-cloud/storage');

// Instantiate clients
const bigquery = new BigQuery();
const storage = new Storage();

 * This sample loads the CSV file at
 * TODO(developer): Replace the following lines with the path to your file.
const bucketName = 'cloud-samples-data';
const filename = 'bigquery/sample-transactions/transactions.csv';

async function loadTableClustered() {
  // Loads a new clustered table named "my_table" in "my_dataset".

   * TODO(developer): Uncomment the following lines before running the sample.
  // const datasetId = "my_dataset";
  // const tableId = "my_table";

  const metadata = {
    sourceFormat: 'CSV',
    skipLeadingRows: 1,
    schema: {
      fields: [
        {name: 'timestamp', type: 'TIMESTAMP'},
        {name: 'origin', type: 'STRING'},
        {name: 'destination', type: 'STRING'},
        {name: 'amount', type: 'NUMERIC'},
    clustering: {
      fields: ['origin', 'destination'],

  // Load data from a Google Cloud Storage file into the table
  const [job] = await bigquery
    .load(storage.bucket(bucketName).file(filename), metadata);

  // load() waits for the job to finish
  console.log(`Job ${} completed.`);


Before trying this sample, follow the Python setup instructions in the BigQuery quickstart using client libraries. For more information, see the BigQuery Python API reference documentation.

from import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name"

job_config = bigquery.LoadJobConfig(
        bigquery.SchemaField("timestamp", bigquery.SqlTypeNames.TIMESTAMP),
        bigquery.SchemaField("origin", bigquery.SqlTypeNames.STRING),
        bigquery.SchemaField("destination", bigquery.SqlTypeNames.STRING),
        bigquery.SchemaField("amount", bigquery.SqlTypeNames.NUMERIC),
    clustering_fields=["origin", "destination"],

job = client.load_table_from_uri(

job.result()  # Waits for the job to complete.

table = client.get_table(table_id)  # Make an API request.
    "Loaded {} rows and {} columns to {}".format(
        table.num_rows, len(table.schema), table_id

