Managing Tables

This document describes how to manage tables.

Overview

A BigQuery table is a standard, two-dimensional table with individual records organized in rows, and a data type assigned to each column (also called a field). Individual fields within a record may contain nested and repeated children fields.

Every table is described by a schema that describes field names, types, and other information. If you need to change the schema later on, you can update the schema. You can specify the schema of a table during the initial table creation request, or you can create a table without a schema and declare the schema in the query or load job that first populates the table.

For more information about allowed types and data formats, see preparing data for BigQuery.

List tables in a dataset by calling bigquery.tables.list. You must have access to the specified dataset to list its tables.

Example

C#

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

var tables = _client.ListTables(datasetId).ToList();

Go

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

ts := client.Dataset(datasetID).Tables(ctx)
for {
	t, err := ts.Next()
	if err == iterator.Done {
		break
	}
	if err != nil {
		return err
	}
	fmt.Fprintf(w, "Table: %q\n", t.TableID)
}

Java

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

Page<Table> tables = bigquery.listTables(datasetName, TableListOption.pageSize(100));
Iterator<Table> tableIterator = tables.iterateAll();
while (tableIterator.hasNext()) {
  Table table = tableIterator.next();
  // do something with the table
}

Node.js

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

function listTables (datasetId, projectId) {
  // Instantiates a client
  const bigquery = BigQuery({
    projectId: projectId
  });

  // References an existing dataset, e.g. "my_dataset"
  const dataset = bigquery.dataset(datasetId);

  // Lists all tables in the dataset
  return dataset.getTables()
    .then((results) => {
      const tables = results[0];
      console.log('Tables:');
      tables.forEach((table) => console.log(table.id));
      return tables;
    });
}

PHP

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

use Google\Cloud\BigQuery\BigQueryClient;

/**
 * @param string $projectId The Google project ID.
 * @param string $datasetId The BigQuery dataset ID.
 */
function list_tables($projectId, $datasetId)
{
    $bigQuery = new BigQueryClient([
        'projectId' => $projectId,
    ]);
    $dataset = $bigQuery->dataset($datasetId);
    $tables = $dataset->tables();
    foreach ($tables as $table) {
        print($table->id() . PHP_EOL);
    }
}

/**
 * @param string $projectId The Google project ID.
 * @param string $datasetId The BigQuery dataset ID.
 * @param string $tableId   The BigQuery table ID.
 */
function get_table($projectId, $datasetId, $tableId)
{
    $bigQuery = new BigQueryClient([
        'projectId' => $projectId,
    ]);
    $dataset = $bigQuery->dataset($datasetId);
    return $dataset->table($tableId);
}

Python

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

def list_tables(dataset_name, project=None):
    """Lists all of the tables in a given dataset.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)

    if not dataset.exists():
        print('Dataset {} does not exist.'.format(dataset_name))
        return

    for table in dataset.list_tables():
        print(table.name)

Ruby

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

# project_id = "Your Google Cloud project ID"
# dataset_id = "ID of the dataset to create table in"

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new project: project_id
dataset  = bigquery.dataset dataset_id

dataset.tables.each do |table|
  puts table.table_id
end

Back to top

Creating a table

BigQuery offers various ways to create a new table:

All tables defined over data in managed BigQuery storage support appending data to the table.

C#

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

public void CreateTable(string datasetId, string tableId, BigqueryClient client)
{
    var dataset = client.GetDataset(datasetId);
    // Create schema for new table.
    var schema = new TableSchemaBuilder
    {
        { "title", BigqueryDbType.String },
        { "unique_words", BigqueryDbType.Integer }
    }.Build();
    // Create the table if it doesn't exist.
    BigqueryTable table = dataset.GetOrCreateTable(tableId, schema);
}

Go

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

// Item represents a row item.
type Item struct {
	Name  string
	Count int
}

// Save implements the ValueSaver interface.
func (i *Item) Save() (map[string]bigquery.Value, string, error) {
	return map[string]bigquery.Value{
		"Name":  i.Name,
		"Count": i.Count,
	}, "", nil
}

	schema, err := bigquery.InferSchema(Item{})
	if err != nil {
		return err
	}
	table := client.Dataset(datasetID).Table(tableID)
	if err := table.Create(ctx, schema); err != nil {
		return err
	}

Java

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

TableId tableId = TableId.of(datasetName, tableName);
// Table field definition
Field field = Field.of(fieldName, Field.Type.string());
// Table schema definition
Schema schema = Schema.of(field);
TableDefinition tableDefinition = StandardTableDefinition.of(schema);
TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();
Table table = bigquery.create(tableInfo);

Node.js

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

function createTable (datasetId, tableId, schema, projectId) {
  // Instantiates a client
  const bigquery = BigQuery({
    projectId: projectId
  });

  // References an existing dataset, e.g. "my_dataset"
  const dataset = bigquery.dataset(datasetId);

  // Specify a schema, e.g. "Name:string, Age:integer, Weight:float, IsMagic:boolean"
  // For all options, see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource
  const options = {
    schema: schema
  };

  // Create a new table in the dataset
  return dataset.createTable(tableId, options)
    .then((results) => {
      const table = results[0];
      console.log(`Table ${table.id} created.`);
      return table;
    });
}

PHP

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

use Google\Cloud\BigQuery\BigQueryClient;

/**
 * Example:
 * ```
 * $fields = [
 *     [
 *         'name' => 'field1',
 *         'type' => 'string',
 *         'mode' => 'required'
 *     ],
 *     [
 *         'name' => 'field2',
 *         'type' => 'integer'
 *     ],
 * ];
 * $schema = ['fields' => $fields];
 * create_table($projectId, $datasetId, $tableId, $schema);
 * ```
 * @param string $projectId The Google project ID.
 * @param string $datasetId The BigQuery dataset ID.
 * @param string $tableId   The BigQuery table ID.
 * @param array  $schema    The BigQuery table schema.
 */
function create_table($projectId, $datasetId, $tableId, $schema)
{
    $bigQuery = new BigQueryClient([
        'projectId' => $projectId,
    ]);
    $dataset = $bigQuery->dataset($datasetId);
    $options = ['schema' => $schema];
    $table = $dataset->createTable($tableId, $options);
    return $table;
}

Python

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

def create_table(dataset_name, table_name, project=None):
    """Creates a simple table in the given dataset.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)

    if not dataset.exists():
        print('Dataset {} does not exist.'.format(dataset_name))
        return

    table = dataset.table(table_name)

    # Set the table schema
    table.schema = (
        bigquery.SchemaField('Name', 'STRING'),
        bigquery.SchemaField('Age', 'INTEGER'),
        bigquery.SchemaField('Weight', 'FLOAT'),
    )

    table.create()

    print('Created table {} in dataset {}.'.format(table_name, dataset_name))

Ruby

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

# project_id = "Your Google Cloud project ID"
# dataset_id = "ID of the dataset to create table in"
# table_id   = "ID of the table to create"

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new project: project_id
dataset  = bigquery.dataset dataset_id

dataset.create_table table_id

puts "Created table: #{table_id}"

Back to top

Exporting a table

You can export a table in CSV, JSON or Avro format, into a Cloud Storage bucket. For more information, see exporting data from BigQuery.

Back to top

Appending data

You can load additional data into a table either from source files or by appending query results. Note that the schema of the loaded data must match the schema of the existing table, but you can update the schema before appending.

To append data from a source file:

  1. Create additional source files
  2. Upload the files, using either code or the web UI, and
  3. Load them into to your existing table by referencing the existing table. In code, set writeDisposition=WRITE_APPEND.

To append data from a query result:

  1. Run an asynchronous query, pass in the name of your existing table, and set writeDisposition=WRITE_APPEND.

Back to top

Updating table schema

Once your table is created, you can update the schema by calling the tables.update or tables.patch functions. For example, using the BigQuery command-line tool: bq update -t <schema>.

Allowed operations include:

  • Adding NULLABLE or REPEATED columns at the end
  • Making REQUIRED fields NULLABLE

Back to top

Browsing through table data

Browse through a table's data by calling bigquery.tabledata.list, specifying the name of the table and an optional row offset. This method lets you specify the maximum number of rows to return per page. If you request a row index beyond the last row, the method will return successfully, but without a rows property.

Values are returned wrapped in a JSON object that you must parse, as described in the reference documentation.

Tip: You can find out how many rows are in your table either by querying for one page of results and examining the totalRows property, or by running the query SELECT COUNT(*) from myProject:mydataset.myTable

Browsing Through Table Data Examples

C#

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

public int TableDataList(
    string datasetId, string tableId, int pageSize, BigqueryClient client)
{
    int recordCount = 0;
    var result = client.ListRows(datasetId, tableId, null,
        new ListRowsOptions { PageSize = pageSize });
    // If there are more rows than were returned in the first page of results, 
    // iterating over the rows will lazily evaluate the results each time, 
    // making further requests as necessary.
    foreach (var row in result)
    {
        Console.WriteLine($"{row["title"]}: {row["unique_words"]}");
        recordCount++;
    }
    return recordCount;
}

Go

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

The Google Cloud Client Library for Go automatically paginates by default, so you do not need to implement pagination yourself, for example:

table := client.Dataset(datasetID).Table(tableID)
it := table.Read(ctx)
for {
	var row []bigquery.Value
	err := it.Next(&row)
	if err == iterator.Done {
		break
	}
	if err != nil {
		return err
	}
	fmt.Println(row)
}

Java

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

Page<List<FieldValue>> tableData =
    bigquery.listTableData(datasetName, tableName, TableDataListOption.pageSize(100));
Iterator<List<FieldValue>> rowIterator = tableData.iterateAll();
while (rowIterator.hasNext()) {
  List<FieldValue> row = rowIterator.next();
  // do something with the row
}

Node.js

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

The Google Cloud Client Library for Node.js automatically paginates by default, so you do not need to implement pagination yourself, for example:

function browseRows (datasetId, tableId, projectId) {
  // Instantiates a client
  const bigquery = BigQuery({
    projectId: projectId
  });

  // References an existing dataset, e.g. "my_dataset"
  const dataset = bigquery.dataset(datasetId);
  // References an existing table, e.g. "my_table"
  const table = dataset.table(tableId);

  // Lists rows in the table
  return table.getRows()
    .then((results) => {
      const rows = results[0];
      console.log('Rows:');
      rows.forEach((row) => console.log(row));
      return rows;
    });
}

PHP

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

Pagination happens automatically in the Google Cloud Client Library for PHP using the generator function rows, which fetches the next page of results during iteration.

use Google\Cloud\BigQuery\BigQueryClient;

/**
 * Browse a bigquery table.
 * Example:
 * ```
 * browse_table($projectId, $datasetId, $tableId);
 * ```
 *
 * @param string $projectId  The Google project ID.
 * @param string $datasetId  The BigQuery dataset ID.
 * @param string $tableId    The BigQuery table ID.
 * @param string $maxResults The number of results to return at a time.
 * @param string $startIndex The row index to start on.
 *
 * @return int number of rows returned
 */
function browse_table($projectId, $datasetId, $tableId, $maxResults = 10, $startIndex = 0)
{
    $options = [
        'maxResults' => $maxResults,
        'startIndex' => $startIndex
    ];
    $bigQuery = new BigQueryClient([
        'projectId' => $projectId,
    ]);
    $dataset = $bigQuery->dataset($datasetId);
    $table = $dataset->table($tableId);
    $numRows = 0;
    foreach ($table->rows($options) as $row) {
        print('---');
        foreach ($row as $column => $value) {
            printf('%s: %s' . PHP_EOL, $column, $value);
        }
        $numRows++;
    }

    return $numRows;
}

Python

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

def list_rows(dataset_name, table_name, project=None):
    """Prints rows in the given table.

    Will print 25 rows at most for brevity as tables can contain large amounts
    of rows.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    if not table.exists():
        print('Table {}:{} does not exist.'.format(dataset_name, table_name))
        return

    # Reload the table so that the schema is available.
    table.reload()

    # Load at most 25 results. You can change the max_results argument to load
    # more rows from BigQuery, but note that this can take some time. It's
    # preferred to use a query.
    rows = list(table.fetch_data(max_results=25))

    # Use format to create a simple table.
    format_string = '{!s:<16} ' * len(table.schema)

    # Print schema field names
    field_names = [field.name for field in table.schema]
    print(format_string.format(*field_names))

    for row in rows:
        print(format_string.format(*row))

Ruby

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

Pagination happens automatically in the Google Cloud Client Library for Ruby using Table#data and Data#next.

# project_id = "Your Google Cloud project ID"
# dataset_id = "ID of the dataset containing table"
# table_id   = "ID of the table to display data for"

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new project: project_id
dataset  = bigquery.dataset dataset_id
table    = dataset.table table_id

table.data.each do |row|
  row.each      do |column_name, value|
    puts "#{column_name} = #{value}"
  end
end

Back to top

Modifying or deleting rows or data

BigQuery tables are append-only. The query language does not currently support either updating or deleting data. In order to update or delete data, you must delete the table, then recreate the table with new data. Alternatively, you could write a query that modifies the data and specify a new results table.

Back to top

Deleting a table

When you delete a table, the table and all its data are removed immediately.

Example

C#

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

public void DeleteTable(string datasetId, string tableId, BigqueryClient client)
{
    client.DeleteTable(_projectId, datasetId, tableId);
}

Go

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

table := client.Dataset(datasetID).Table(tableID)
if err := table.Delete(ctx); err != nil {
	return err
}

Java

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

Boolean deleted = bigquery.delete(datasetName, tableName);
if (deleted) {
  // the table was deleted
} else {
  // the table was not found
}

Node.js

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

function deleteTable (datasetId, tableId, projectId) {
  // Instantiates a client
  const bigquery = BigQuery({
    projectId: projectId
  });

  // References an existing dataset, e.g. "my_dataset"
  const dataset = bigquery.dataset(datasetId);
  // References an existing table, e.g. "my_table"
  const table = dataset.table(tableId);

  // Deletes the table
  return table.delete()
    .then(() => {
      console.log(`Table ${table.id} deleted.`);
    });
}

PHP

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

use Google\Cloud\BigQuery\BigQueryClient;

/**
 * @param string $projectId The Google project ID.
 * @param string $datasetId The BigQuery dataset ID.
 * @param string $tableId   The BigQuery table ID.
 */
function delete_table($projectId, $datasetId, $tableId)
{
    $bigQuery = new BigQueryClient([
        'projectId' => $projectId,
    ]);
    $dataset = $bigQuery->dataset($datasetId);
    $table = $dataset->table($tableId);
    $table->delete();
}

Python

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

def delete_table(dataset_name, table_name, project=None):
    """Deletes a table in a given dataset.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    table.delete()

    print('Table {}:{} deleted.'.format(dataset_name, table_name))

Ruby

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

# project_id = "Your Google Cloud project ID"
# dataset_id = "ID of the dataset delete table from"
# table_id   = "ID of the table to delete"

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new project: project_id
dataset  = bigquery.dataset dataset_id
table    = dataset.table table_id

table.delete

puts "Deleted table: #{table_id}"

Back to top

Undeleting a Table

It's possible to undelete a table within two days of table deletion by using table decorators and the copy command. Undeletions are performed on a best-effort basis and are not guaranteed. Tables from deleted datasets and deleted tables replaced with a new table of the same name cannot be undeleted.

To undelete a table, determine a UNIX timestamp of when the table existed (in milliseconds). Then, copy the table at that time to a new table. The new table must have a different name than the deleted table.

For example, the following statement copies mydataset.mytable at the time 1418864998000 into a new table mydataset.newtable:

bq cp mydataset.mytable@1418864998000 mydataset.newtable

Back to top

Copying an existing table

There are three different ways you can copy a table: using the web UI, the command-line tool, or programmatically through the API:

To copy a table using the API:

You can copy an existing table through the API by calling the bigquery.jobs.insert method, and configuring a copy job. You must specify the following in your job configuration:

"copy": {
      "sourceTable": {       // Required
        "projectId": string, // Required
        "datasetId": string, // Required
        "tableId": string    // Required
      },
      "destinationTable": {  // Required
        "projectId": string, // Required
        "datasetId": string, // Required
        "tableId": string    // Required
      },
      "createDisposition": string,  // Optional
      "writeDisposition": string,   // Optional
    },

where sourceTable provides information about the table to be copied, destinationTable provides information about the new table, createDisposition specifies whether to create the table if it doesn't exist, and writeDisposition specifies whether to overwrite or append to an existing table.

Example

C#

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

public void CopyTable(
    string datasetId, string tableIdToBeCopied, string newTableId, BigqueryClient client)
{
    var table = client.GetTable(datasetId, tableIdToBeCopied);
    string query = $"SELECT * FROM {table}";
    var destination = client.GetTableReference(datasetId, newTableId);
    BigqueryJob job = client.CreateQueryJob(query,
        new CreateQueryJobOptions { DestinationTable = destination });
    // Wait for the job to complete.
    job.PollQueryUntilCompleted();
}

Go

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

dataset := client.Dataset(datasetID)
copier := dataset.Table(dstID).CopierFrom(dataset.Table(srcID))
copier.WriteDisposition = bigquery.WriteTruncate
job, err := copier.Run(ctx)
if err != nil {
	return err
}
status, err := job.Wait(ctx)
if err != nil {
	return err
}
if err := status.Err(); err != nil {
	return err
}

Java

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

Job job = table.copy(datasetName, tableName);
// Wait for the job to complete.
try {
  Job completedJob = job.waitFor(WaitForOption.checkEvery(1, TimeUnit.SECONDS),
      WaitForOption.timeout(3, TimeUnit.MINUTES));
  if (completedJob != null && completedJob.getStatus().getError() == null) {
    // Job completed successfully
  } else {
    // Handle error case
  }
} catch (InterruptedException | TimeoutException e) {
  // Handle interrupted wait
}

Node.js

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

function copyTable (srcDatasetId, srcTableId, destDatasetId, destTableId, projectId) {
  // Instantiates a client
  const bigquery = BigQuery({
    projectId: projectId
  });

  // References the source dataset, e.g. "my_dataset"
  const srcDataset = bigquery.dataset(srcDatasetId);
  // References the source table, e.g. "my_table"
  const srcTable = srcDataset.table(srcTableId);
  // References the destination dataset, e.g. "my_other_dataset"
  const destDataset = bigquery.dataset(destDatasetId);
  // References the destination table, e.g. "my_other_table"
  const destTable = destDataset.table(destTableId);

  let job;

  // Copies the table contents into another table
  return srcTable.copy(destTable)
    .then((results) => {
      job = results[0];
      console.log(`Job ${job.id} started.`);
      return job.promise();
    })
    .then((results) => {
      console.log(`Job ${job.id} completed.`);
      return results;
    });
}

PHP

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

use Google\Cloud\BigQuery\BigQueryClient;
use Google\Cloud\ExponentialBackoff;

/**
 * @param string $projectId             The Google project ID.
 * @param string $datasetId             The BigQuery dataset ID.
 * @param string $sourceTableId         The BigQuery table ID to copy from.
 * @param string $destinationTableId    The BigQuery table ID to copy to.
 */
function copy_table($projectId, $datasetId, $sourceTableId, $destinationTableId)
{
    $bigQuery = new BigQueryClient([
        'projectId' => $projectId,
    ]);
    $dataset = $bigQuery->dataset($datasetId);
    $sourceTable = $dataset->table($sourceTableId);
    $destinationTable = $dataset->table($destinationTableId);
    $job = $sourceTable->copy($destinationTable);
    // poll the job until it is complete
    $backoff = new ExponentialBackoff(10);
    $backoff->execute(function () use ($job) {
        print('Waiting for job to complete' . PHP_EOL);
        $job->reload();
        if (!$job->isComplete()) {
            throw new Exception('Job has not yet completed', 500);
        }
    });
    // check if the job has errors
    if (isset($job->info()['status']['errorResult'])) {
        $error = $job->info()['status']['errorResult']['message'];
        printf('Error running job: %s' . PHP_EOL, $error);
    } else {
        print('Table copied successfully' . PHP_EOL);
    }
}

Python

For more on installing and creating a BigQuery client, refer to BigQuery Client Libraries.

def copy_table(dataset_name, table_name, new_table_name, project=None):
    """Copies a table.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    # This sample shows the destination table in the same dataset and project,
    # however, it's possible to copy across datasets and projects. You can
    # also copy muliple source tables into a single destination table by
    # providing addtional arguments to `copy_table`.
    destination_table = dataset.table(new_table_name)

    # Create a job to copy the table to the destination table.
    job_id = str(uuid.uuid4())
    job = bigquery_client.copy_table(
        job_id, destination_table, table)

    # Create the table if it doesn't exist.
    job.create_disposition = (
        google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED)

    # Start the job.
    job.begin()

    # Wait for the the job to finish.
    print('Waiting for job to finish...')
    wait_for_job(job)

    print('Table {} copied to {}.'.format(table_name, new_table_name))

Back to top

Send feedback about...

BigQuery Documentation