通过创建永久表查询 Cloud Storage 上的文件的数据。
包含此代码示例的文档页面
如需查看上下文中使用的代码示例,请参阅以下文档:
代码示例
Java
试用此示例之前,请按照《BigQuery 快速入门:使用客户端库》中的 Java 设置说明进行操作。 如需了解详情,请参阅 BigQuery Java API 参考文档。
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.TableResult;
// Sample to queries an external data source using a permanent table
public class QueryExternalGcsPerm {
public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv";
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
String query =
String.format("SELECT * FROM %s.%s WHERE name LIKE 'W%%'", datasetName, tableName);
queryExternalGcsPerm(datasetName, tableName, sourceUri, schema, query);
}
public static void queryExternalGcsPerm(
String datasetName, String tableName, String sourceUri, Schema schema, String query) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Skip header row in the file.
CsvOptions csvOptions = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
TableId tableId = TableId.of(datasetName, tableName);
// Create a permanent table linked to the GCS file
ExternalTableDefinition externalTable =
ExternalTableDefinition.newBuilder(sourceUri, csvOptions).setSchema(schema).build();
bigquery.create(TableInfo.of(tableId, externalTable));
// Example query to find states starting with 'W'
TableResult results = bigquery.query(QueryJobConfiguration.of(query));
results
.iterateAll()
.forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString())));
System.out.println("Query on external permanent table performed successfully.");
} catch (BigQueryException | InterruptedException e) {
System.out.println("Query not performed \n" + e.toString());
}
}
}
Node.js
在尝试此示例之前,请按照《BigQuery 快速入门:使用客户端库》中的 Node.js 设置说明进行操作。如需了解详情,请参阅 BigQuery Node.js API 参考文档。
// Import the Google Cloud client library and create a client
const {BigQuery} = require('@google-cloud/bigquery');
const bigquery = new BigQuery();
async function queryExternalGCSPerm() {
// Queries an external data source using a permanent table
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const datasetId = "my_dataset";
// const tableId = "my_table";
// Configure the external data source
const dataConfig = {
sourceFormat: 'CSV',
sourceUris: ['gs://cloud-samples-data/bigquery/us-states/us-states.csv'],
// Optionally skip header row
csvOptions: {skipLeadingRows: 1},
};
// For all options, see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource
const options = {
schema: schema,
externalDataConfiguration: dataConfig,
};
// Create an external table linked to the GCS file
const [table] = await bigquery
.dataset(datasetId)
.createTable(tableId, options);
console.log(`Table ${table.id} created.`);
// Example query to find states starting with 'W'
const query = `SELECT post_abbr
FROM \`${datasetId}.${tableId}\`
WHERE name LIKE 'W%'`;
// Run the query as a job
const [job] = await bigquery.createQueryJob(query);
console.log(`Job ${job.id} started.`);
// Wait for the query to finish
const [rows] = await job.getQueryResults();
// Print the results
console.log('Rows:');
console.log(rows);
}
Python
在尝试此示例之前,请按照《BigQuery 快速入门:使用客户端库》中的 Python 设置说明进行操作。如需了解详情,请参阅 BigQuery Python API 参考文档。
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
# Configure the external data source
dataset_ref = bigquery.DatasetReference(project, dataset_id)
table_id = "us_states"
schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
table = bigquery.Table(dataset_ref.table(table_id), schema=schema)
external_config = bigquery.ExternalConfig("CSV")
external_config.source_uris = [
"gs://cloud-samples-data/bigquery/us-states/us-states.csv"
]
external_config.options.skip_leading_rows = 1 # optionally skip header row
table.external_data_configuration = external_config
# Create a permanent table linked to the GCS file
table = client.create_table(table) # API request
# Example query to find states starting with 'W'
sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id)
query_job = client.query(sql) # API request
w_states = list(query_job) # Waits for query to finish
print("There are {} states with names starting with W.".format(len(w_states)))