Relax a column in a query append job

Change a column from required to nullable in a query append job.

Documentation pages that include this code sample

To view the code sample used in context, see the following documentation:

Code sample

Go

Before trying this sample, follow the Go setup instructions in the BigQuery Quickstart Using Client Libraries. For more information, see the BigQuery Go API reference documentation.

import (
	"context"
	"fmt"

	"cloud.google.com/go/bigquery"
)

// relaxTableQuery demonstrates relaxing the schema of a table by appending query results to
// enable the table to allow NULL values.
func relaxTableQuery(projectID, datasetID, tableID string) error {
	// projectID := "my-project-id"
	// datasetID := "mydataset"
	// tableID := "mytable"
	ctx := context.Background()
	client, err := bigquery.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("bigquery.NewClient: %v", err)
	}
	defer client.Close()

	sampleSchema := bigquery.Schema{
		{Name: "full_name", Type: bigquery.StringFieldType, Required: true},
		{Name: "age", Type: bigquery.IntegerFieldType, Required: true},
	}
	meta := &bigquery.TableMetadata{
		Schema: sampleSchema,
	}
	tableRef := client.Dataset(datasetID).Table(tableID)
	if err := tableRef.Create(ctx, meta); err != nil {
		return err
	}
	// Now, append a query result that includes nulls, but allow the job to relax
	// all required columns.
	q := client.Query("SELECT \"Beyonce\" as full_name")
	q.QueryConfig.Dst = client.Dataset(datasetID).Table(tableID)
	q.SchemaUpdateOptions = []string{"ALLOW_FIELD_RELAXATION"}
	q.WriteDisposition = bigquery.WriteAppend
	q.Location = "US"
	job, err := q.Run(ctx)
	if err != nil {
		return err
	}
	_, err = job.Wait(ctx)
	if err != nil {
		return err
	}
	return nil
}

Java

Before trying this sample, follow the Java setup instructions in the BigQuery Quickstart Using Client Libraries. For more information, see the BigQuery Java API reference documentation.

import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.JobInfo.SchemaUpdateOption;
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableResult;
import com.google.common.collect.ImmutableList;

public class RelaxTableQuery {

  public static void main(String[] args) throws Exception {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "MY_PROJECT_ID";
    String datasetName = "MY_DATASET_NAME";
    String tableName = "MY_TABLE_NAME";
    relaxTableQuery(projectId, datasetName, tableName);
  }

  // To relax all columns in a destination table when you append data to it during a query job
  public static void relaxTableQuery(String projectId, String datasetName, String tableName)
      throws Exception {
    try {
      // Initialize client that will be used to send requests. This client only needs to be created
      // once, and can be reused for multiple requests.
      BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

      TableId tableId = TableId.of(datasetName, tableName);

      String sourceTable = "`" + projectId + "." + datasetName + "." + tableName + "`";
      String query = "SELECT word FROM " + sourceTable + " WHERE word like '%is%'";

      QueryJobConfiguration queryConfig =
          QueryJobConfiguration.newBuilder(query)
              // Use standard SQL syntax for queries.
              // See: https://cloud.google.com/bigquery/sql-reference/
              .setUseLegacySql(false)
              .setSchemaUpdateOptions(ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_RELAXATION))
              .setWriteDisposition(WriteDisposition.WRITE_APPEND)
              .setDestinationTable(tableId)
              .build();

      Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).build());

      queryJob = queryJob.waitFor();

      // Check for errors
      if (queryJob == null) {
        throw new Exception("Job no longer exists");
      } else if (queryJob.getStatus().getError() != null) {
        // You can also look at queryJob.getStatus().getExecutionErrors() for all
        // errors, not just the latest one.
        throw new Exception(queryJob.getStatus().getError().toString());
      }

      // Get the results.
      TableResult results = queryJob.getQueryResults();

      // Print all pages of the results.
      results
          .iterateAll()
          .forEach(
              rows -> {
                rows.forEach(row -> System.out.println("row: " + row.toString()));
              });

      System.out.println("Successfully relaxed all columns in destination table during query job");
    } catch (BigQueryException | InterruptedException e) {
      System.out.println("Columns not relaxed during query job \n" + e.toString());
    }
  }
}

Node.js

Before trying this sample, follow the Node.js setup instructions in the BigQuery Quickstart Using Client Libraries. For more information, see the BigQuery Node.js API reference documentation.

const {BigQuery} = require('@google-cloud/bigquery');
const bigquery = new BigQuery();

async function relaxColumnQueryAppend() {
  // Change required to null in query append job

  /**
   * TODO(developer): Uncomment the following lines before running the sample.
   */
  // const projectId = "my_project"
  // const datasetId = "my_dataset"
  // const tableId = "my_table"

  // Retrieve the destination table and checks the number of required fields.
  const dataset = await bigquery.dataset(datasetId);
  const table = await dataset.table(tableId);
  const [metaData] = await table.getMetadata();

  const requiredFields = metaData.schema.fields.filter(
    ({mode}) => mode === 'REQUIRED'
  ).length;

  console.log(`${requiredFields} fields in the schema are required.`);

  // Create destination table reference
  const tableRef = {
    projectId,
    tableId,
    datasetId,
  };

  /* Configure the query to append the results to a destination table,
   * allowing field relaxation. In this example, the existing table
   * contains 'age' as a required column.
   *
   * For all options, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery
   */
  const queryJobConfig = {
    query: `SELECT age FROM \`${projectId}.${datasetId}.${tableId}\``,
    destinationTable: tableRef,
    schemaUpdateOptions: ['ALLOW_FIELD_RELAXATION'],
    writeDisposition: ['WRITE_APPEND'],
    useLegacySql: false,
  };

  // Configure the job.
  const jobConfig = {
    configuration: {
      query: queryJobConfig,
    },
  };

  // Start the query, passing in the extra configuration.
  const response = await bigquery.createJob(jobConfig);
  const job = response[0];

  // Wait for job to complete.
  await job.getQueryResults(job);

  // Check the updated number of required fields.
  const updatedTable = await dataset.table(tableId);
  const [updatedMetaData] = await updatedTable.getMetadata();

  const updatedRequiredFields = updatedMetaData.schema.fields.filter(
    ({mode}) => mode === 'REQUIRED'
  ).length;

  console.log(
    `${updatedRequiredFields} fields in the schema are now required.`
  );
}

Python

Before trying this sample, follow the Python setup instructions in the BigQuery Quickstart Using Client Libraries. For more information, see the BigQuery Python API reference documentation.

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the destination table.
# table_id = "your-project.your_dataset.your_table_name"

# Retrieves the destination table and checks the number of required fields.
table = client.get_table(table_id)  # Make an API request.
original_required_fields = sum(field.mode == "REQUIRED" for field in table.schema)

# In this example, the existing table has 2 required fields.
print("{} fields in the schema are required.".format(original_required_fields))

# Configures the query to append the results to a destination table,
# allowing field relaxation.
job_config = bigquery.QueryJobConfig(
    destination=table_id,
    schema_update_options=[bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION],
    write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
)

# Start the query, passing in the extra configuration.
query_job = client.query(
    # In this example, the existing table contains 'full_name' and 'age' as
    # required columns, but the query results will omit the second column.
    'SELECT "Beyonce" as full_name;',
    job_config=job_config,
)  # Make an API request.
query_job.result()  # Wait for the job to complete.

# Checks the updated number of required fields.
table = client.get_table(table_id)  # Make an API request.
current_required_fields = sum(field.mode == "REQUIRED" for field in table.schema)
print("{} fields in the schema are now required.".format(current_required_fields))