Import data into a dataset

Stay organized with collections Save and categorize content based on your preferences.

Creates a dataset and then imports data into the dataset.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample


import java.util.Arrays;
import java.util.concurrent.ExecutionException;

class TablesImportDataset {

  public static void main(String[] args)
      throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String path = "gs://BUCKET_ID/path/to//data.csv or bq://project_id.dataset_id.table_id";
    importDataset(projectId, datasetId, path);

  // Import a dataset via BigQuery or Google Cloud Storage
  static void importDataset(String projectId, String datasetId, String path)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);

      InputConfig.Builder inputConfigBuilder = InputConfig.newBuilder();

      // Determine which source type was used for the input path (BigQuery or GCS)
      if (path.startsWith("bq")) {
        // Get training data file to be imported from a BigQuery source.
        BigQuerySource.Builder bigQuerySource = BigQuerySource.newBuilder();
      } else {
        // Get multiple Google Cloud Storage URIs to import data from
        GcsSource gcsSource =

      // Import data from the input URI
      System.out.println("Processing import...");

      Empty response = client.importDataAsync(datasetFullId,;
      System.out.format("Dataset imported. %s%n", response);


const automl = require('@google-cloud/automl');
const client = new automl.v1beta1.AutoMlClient();

 * Demonstrates using the AutoML client to import data.
 * TODO(developer): Uncomment the following lines before running the sample.
// const projectId = '[PROJECT_ID]' e.g., "my-gcloud-project";
// const computeRegion = '[REGION_NAME]' e.g., "us-central1";
// const datasetId = '[DATASET_ID]' e.g., "TBL2246891593778855936";
// const path = '[GCS_PATH]' | '[BIGQUERY_PATH]'
// e.g., "gs://<bucket-name>/<csv file>" or
// "bq://<project_id>.<dataset_id>.<table_id>",
// `string or array of paths in AutoML Tables format`;

// Get the full path of the dataset.
const datasetFullId = client.datasetPath(projectId, computeRegion, datasetId);

let inputConfig = {};
if (path.startsWith('bq')) {
  // Get Bigquery URI.
  inputConfig = {
    bigquerySource: {
      inputUri: path,
} else {
  // Get the multiple Google Cloud Storage URIs.
  const inputUris = path.split(',');
  inputConfig = {
    gcsSource: {
      inputUris: inputUris,

// Import the dataset from the input URI.
  .importData({name: datasetFullId, inputConfig: inputConfig})
  .then(responses => {
    const operation = responses[0];
    console.log('Processing import...');
    return operation.promise();
  .then(responses => {
    // The final result of the operation.
    const operationDetails = responses[2];

    // Get the data import details.
    console.log('Data import details:');
    console.log('\tOperation details:');
    console.log(`\t\tName: ${}`);
    console.log(`\t\tDone: ${operationDetails.done}`);
  .catch(err => {


# TODO(developer): Uncomment and set the following variables
# project_id = 'PROJECT_ID_HERE'
# compute_region = 'COMPUTE_REGION_HERE'
# dataset_display_name = 'DATASET_DISPLAY_NAME'
# path = 'gs://path/to/file.csv' or 'bq://project_id.dataset.table_id'

from import automl_v1beta1 as automl

client = automl.TablesClient(project=project_id, region=compute_region)

response = None
if path.startswith("bq"):
    response = client.import_data(
        dataset_display_name=dataset_display_name, bigquery_input_uri=path
    # Get the multiple Google Cloud Storage URIs.
    input_uris = path.split(",")
    response = client.import_data(

print("Processing import...")
# synchronous check of operation status.
print("Data imported. {}".format(response.result()))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.