通过创建永久表查询 Cloud Storage 上的文件的数据。
深入探索
如需查看包含此代码示例的详细文档,请参阅以下内容:
代码示例
Java
试用此示例之前,请按照 BigQuery 快速入门:使用客户端库中的 Java 设置说明进行操作。如需了解详情,请参阅 BigQuery Java API 参考文档。
如需向 BigQuery 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为客户端库设置身份验证。
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.TableResult;
// Sample to queries an external data source using a permanent table
public class QueryExternalGcsPerm {
public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
String query =
String.format("SELECT * FROM %s.%s WHERE name LIKE 'W%%'", datasetName, tableName);
queryExternalGcsPerm(datasetName, tableName, sourceUri, schema, query);
}
public static void queryExternalGcsPerm(
String datasetName, String tableName, String sourceUri, Schema schema, String query) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Skip header row in the file.
CsvOptions csvOptions = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
TableId tableId = TableId.of(datasetName, tableName);
// Create a permanent table linked to the GCS file
ExternalTableDefinition externalTable =
ExternalTableDefinition.newBuilder(sourceUri, csvOptions).setSchema(schema).build();
bigquery.create(TableInfo.of(tableId, externalTable));
// Example query to find states starting with 'W'
TableResult results = bigquery.query(QueryJobConfiguration.of(query));
results
.iterateAll()
.forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString())));
System.out.println("Query on external permanent table performed successfully.");
} catch (BigQueryException | InterruptedException e) {
System.out.println("Query not performed \n" + e.toString());
}
}
}
Node.js
试用此示例之前,请按照 BigQuery 快速入门:使用客户端库中的 Node.js 设置说明进行操作。如需了解详情,请参阅 BigQuery Node.js API 参考文档。
如需向 BigQuery 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为客户端库设置身份验证。
// Import the Google Cloud client library and create a client
const {BigQuery} = require('@google-cloud/bigquery');
const bigquery = new BigQuery();
async function queryExternalGCSPerm() {
// Queries an external data source using a permanent table
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const datasetId = "my_dataset";
// const tableId = "my_table";
// Configure the external data source
const dataConfig = {
sourceFormat: 'CSV',
sourceUris: ['gs://cloud-samples-data/bigquery/us-states/us-states.csv'],
// Optionally skip header row
csvOptions: {skipLeadingRows: 1},
};
// For all options, see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource
const options = {
schema: schema,
externalDataConfiguration: dataConfig,
};
// Create an external table linked to the GCS file
const [table] = await bigquery
.dataset(datasetId)
.createTable(tableId, options);
console.log(`Table ${table.id} created.`);
// Example query to find states starting with 'W'
const query = `SELECT post_abbr
FROM \`${datasetId}.${tableId}\`
WHERE name LIKE 'W%'`;
// Run the query as a job
const [job] = await bigquery.createQueryJob(query);
console.log(`Job ${job.id} started.`);
// Wait for the query to finish
const [rows] = await job.getQueryResults();
// Print the results
console.log('Rows:');
console.log(rows);
}
Python
试用此示例之前,请按照 BigQuery 快速入门:使用客户端库中的 Python 设置说明进行操作。如需了解详情,请参阅 BigQuery Python API 参考文档。
如需向 BigQuery 进行身份验证,请设置应用默认凭据。 如需了解详情,请参阅为客户端库设置身份验证。
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to create.
table_id = "your-project.your_dataset.your_table_name"
# TODO(developer): Set the external source format of your table.
# Note that the set of allowed values for external data sources is
# different than the set used for loading data (see :class:`~google.cloud.bigquery.job.SourceFormat`).
external_source_format = "AVRO"
# TODO(developer): Set the source_uris to point to your data in Google Cloud
source_uris = [
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro",
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro",
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro",
]
# Create ExternalConfig object with external source format
external_config = bigquery.ExternalConfig(external_source_format)
# Set source_uris that point to your data in Google Cloud
external_config.source_uris = source_uris
# TODO(developer) You have the option to set a reference_file_schema_uri, which points to
# a reference file for the table schema
reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro"
external_config.reference_file_schema_uri = reference_file_schema_uri
table = bigquery.Table(table_id)
# Set the external data configuration of the table
table.external_data_configuration = external_config
table = client.create_table(table) # Make an API request.
print(
f"Created table with external source format {table.external_data_configuration.source_format}"
)
后续步骤
如需搜索和过滤其他 Google Cloud 产品的代码示例,请参阅 Google Cloud 示例浏览器。