将 Cloud Storage 中的 Avro 文件加载到新表中。
包含此代码示例的文档页面
如需查看上下文中使用的代码示例,请参阅以下文档:
代码示例
Java
试用此示例之前,请按照《BigQuery 快速入门:使用客户端库》中的 Java 设置说明进行操作。 如需了解详情,请参阅 BigQuery Java API 参考文档。
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.FormatOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.LoadJobConfiguration;
import com.google.cloud.bigquery.TableId;
// Sample to load Avro data from Cloud Storage into a new BigQuery table
public class LoadAvroFromGcs {
public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro";
loadAvroFromGcs(datasetName, tableName, sourceUri);
}
public static void loadAvroFromGcs(String datasetName, String tableName, String sourceUri) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
TableId tableId = TableId.of(datasetName, tableName);
LoadJobConfiguration loadConfig =
LoadJobConfiguration.of(tableId, sourceUri, FormatOptions.avro());
// Load data from a GCS Avro file into the table
Job job = bigquery.create(JobInfo.of(loadConfig));
// Blocks until this load table job completes its execution, either failing or succeeding.
job = job.waitFor();
if (job.isDone()) {
System.out.println("Avro from GCS successfully loaded in a table");
} else {
System.out.println(
"BigQuery was unable to load into the table due to an error:"
+ job.getStatus().getError());
}
} catch (BigQueryException | InterruptedException e) {
System.out.println("Column not added during load append \n" + e.toString());
}
}
}
Node.js
在尝试此示例之前,请按照《BigQuery 快速入门:使用客户端库》中的 Node.js 设置说明进行操作。如需了解详情,请参阅 BigQuery Node.js API 参考文档。
// Import the Google Cloud client libraries
const {BigQuery} = require('@google-cloud/bigquery');
const {Storage} = require('@google-cloud/storage');
// Instantiate clients
const bigquery = new BigQuery();
const storage = new Storage();
/**
* This sample loads the Avro file at
* https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.avro
*
* TODO(developer): Replace the following lines with the path to your file.
*/
const bucketName = 'cloud-samples-data';
const filename = 'bigquery/us-states/us-states.avro';
async function loadTableGCSAvro() {
// Imports a GCS file into a table with Avro source format.
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const datasetId = 'my_dataset';
// const tableId = 'us_states';
// Configure the load job. For full list of options, see:
// https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad
const jobConfigurationLoad = {
sourceFormat: 'AVRO',
};
// Load data from a Google Cloud Storage file into the table
const [job] = await bigquery
.dataset(datasetId)
.table(tableId)
.load(storage.bucket(bucketName).file(filename), jobConfigurationLoad);
// load() waits for the job to finish
console.log(`Job ${job.id} completed.`);
// Check the job's status for errors
const errors = job.status.errors;
if (errors && errors.length > 0) {
throw errors;
}
}
Python
在尝试此示例之前,请按照《BigQuery 快速入门:使用客户端库》中的 Python 设置说明进行操作。如需了解详情,请参阅 BigQuery Python API 参考文档。
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name
job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.AVRO)
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro"
load_job = client.load_table_from_uri(
uri, table_id, job_config=job_config
) # Make an API request.
load_job.result() # Waits for the job to complete.
destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))