Query a clustered table

Query a table that has a clustering specification.

Documentation pages that include this code sample

To view the code sample used in context, see the following documentation:

Code sample

Go

Before trying this sample, follow the Go setup instructions in the BigQuery Quickstart Using Client Libraries. For more information, see the BigQuery Go API reference documentation.

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/bigquery"
	"google.golang.org/api/iterator"
)

// queryClusteredTable demonstrates querying a table that has a clustering specification.
func queryClusteredTable(w io.Writer, projectID, datasetID, tableID string) error {
	// projectID := "my-project-id"
	// datasetID := "mydataset"
	// tableID := "mytable"
	ctx := context.Background()
	client, err := bigquery.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("bigquery.NewClient: %v", err)
	}
	defer client.Close()

	q := client.Query(fmt.Sprintf(`
	SELECT
	  COUNT(1) as transactions,
	  SUM(amount) as total_paid,
	  COUNT(DISTINCT destination) as distinct_recipients
    FROM
	  `+"`%s.%s`"+`
	 WHERE
	    timestamp > TIMESTAMP('2015-01-01')
		AND origin = @wallet`, datasetID, tableID))
	q.Parameters = []bigquery.QueryParameter{
		{
			Name:  "wallet",
			Value: "wallet00001866cb7e0f09a890",
		},
	}
	// Run the query and print results when the query job is completed.
	job, err := q.Run(ctx)
	if err != nil {
		return err
	}
	status, err := job.Wait(ctx)
	if err != nil {
		return err
	}
	if err := status.Err(); err != nil {
		return err
	}
	it, err := job.Read(ctx)
	for {
		var row []bigquery.Value
		err := it.Next(&row)
		if err == iterator.Done {
			break
		}
		if err != nil {
			return err
		}
		fmt.Fprintln(w, row)
	}
	return nil
}

Java

Before trying this sample, follow the Java setup instructions in the BigQuery Quickstart Using Client Libraries. For more information, see the BigQuery Java API reference documentation.

import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableResult;

public class QueryClusteredTable {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "MY_PROJECT_ID";
    String datasetName = "MY_DATASET_NAME";
    String tableName = "MY_TABLE_NAME";
    queryClusteredTable(projectId, datasetName, tableName);
  }

  public static void queryClusteredTable(String projectId, String datasetName, String tableName) {
    try {
      // Initialize client that will be used to send requests. This client only needs to be created
      // once, and can be reused for multiple requests.
      BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

      String sourceTable = "`" + projectId + "." + datasetName + "." + tableName + "`";
      String query =
          "SELECT word, word_count\n"
              + "FROM "
              + sourceTable
              + "\n"
              // Optimize query performance by filtering the clustered columns in sort order
              + "WHERE corpus = 'romeoandjuliet'\n"
              + "AND word_count >= 1";

      QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build();

      TableResult results = bigquery.query(queryConfig);

      results
          .iterateAll()
          .forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString())));

      System.out.println("Query clustered table performed successfully.");
    } catch (BigQueryException | InterruptedException e) {
      System.out.println("Query not performed \n" + e.toString());
    }
  }
}

Python

Before trying this sample, follow the Python setup instructions in the BigQuery Quickstart Using Client Libraries. For more information, see the BigQuery Python API reference documentation.

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the destination table.
# table_id = "your-project.your_dataset.your_table_name"

sql = "SELECT * FROM `bigquery-public-data.samples.shakespeare`"
cluster_fields = ["corpus"]

job_config = bigquery.QueryJobConfig(
    clustering_fields=cluster_fields, destination=table_id
)

# Start the query, passing in the extra configuration.
query_job = client.query(sql, job_config=job_config)  # Make an API request.
query_job.result()  # Wait for the job to complete.

table = client.get_table(table_id)  # Make an API request.
if table.clustering_fields == cluster_fields:
    print(
        "The destination table is written using the cluster_fields configuration."
    )

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser