This legacy version of AutoML Tables is deprecated and will no longer be available on Google Cloud after January 23, 2024. All the functionality of legacy AutoML Tables and new features are available on the Vertex AI platform. See Migrate to Vertex AI to learn how to migrate your resources.

Import data into a dataset

Stay organized with collections Save and categorize content based on your preferences.

Creates a dataset and then imports data into the dataset.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

import com.google.cloud.automl.v1beta1.AutoMlClient;
import com.google.cloud.automl.v1beta1.BigQuerySource;
import com.google.cloud.automl.v1beta1.DatasetName;
import com.google.cloud.automl.v1beta1.GcsSource;
import com.google.cloud.automl.v1beta1.InputConfig;
import com.google.protobuf.Empty;
import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.ExecutionException;

class TablesImportDataset {

  public static void main(String[] args)
      throws IOException, ExecutionException, InterruptedException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "YOUR_PROJECT_ID";
    String datasetId = "YOUR_DATASET_ID";
    String path = "gs://BUCKET_ID/path/to//data.csv or bq://project_id.dataset_id.table_id";
    importDataset(projectId, datasetId, path);
  }

  // Import a dataset via BigQuery or Google Cloud Storage
  static void importDataset(String projectId, String datasetId, String path)
      throws IOException, ExecutionException, InterruptedException {
    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (AutoMlClient client = AutoMlClient.create()) {
      // Get the complete path of the dataset.
      DatasetName datasetFullId = DatasetName.of(projectId, "us-central1", datasetId);

      InputConfig.Builder inputConfigBuilder = InputConfig.newBuilder();

      // Determine which source type was used for the input path (BigQuery or GCS)
      if (path.startsWith("bq")) {
        // Get training data file to be imported from a BigQuery source.
        BigQuerySource.Builder bigQuerySource = BigQuerySource.newBuilder();
        bigQuerySource.setInputUri(path);
        inputConfigBuilder.setBigquerySource(bigQuerySource);
      } else {
        // Get multiple Google Cloud Storage URIs to import data from
        GcsSource gcsSource =
            GcsSource.newBuilder().addAllInputUris(Arrays.asList(path.split(","))).build();
        inputConfigBuilder.setGcsSource(gcsSource);
      }

      // Import data from the input URI
      System.out.println("Processing import...");

      Empty response = client.importDataAsync(datasetFullId, inputConfigBuilder.build()).get();
      System.out.format("Dataset imported. %s%n", response);
    }
  }
}

Node.js

const automl = require('@google-cloud/automl');
const client = new automl.v1beta1.AutoMlClient();

/**
 * Demonstrates using the AutoML client to import data.
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// const projectId = '[PROJECT_ID]' e.g., "my-gcloud-project";
// const computeRegion = '[REGION_NAME]' e.g., "us-central1";
// const datasetId = '[DATASET_ID]' e.g., "TBL2246891593778855936";
// const path = '[GCS_PATH]' | '[BIGQUERY_PATH]'
// e.g., "gs://<bucket-name>/<csv file>" or
// "bq://<project_id>.<dataset_id>.<table_id>",
// `string or array of paths in AutoML Tables format`;

// Get the full path of the dataset.
const datasetFullId = client.datasetPath(projectId, computeRegion, datasetId);

let inputConfig = {};
if (path.startsWith('bq')) {
  // Get Bigquery URI.
  inputConfig = {
    bigquerySource: {
      inputUri: path,
    },
  };
} else {
  // Get the multiple Google Cloud Storage URIs.
  const inputUris = path.split(',');
  inputConfig = {
    gcsSource: {
      inputUris: inputUris,
    },
  };
}

// Import the dataset from the input URI.
client
  .importData({name: datasetFullId, inputConfig: inputConfig})
  .then(responses => {
    const operation = responses[0];
    console.log('Processing import...');
    return operation.promise();
  })
  .then(responses => {
    // The final result of the operation.
    const operationDetails = responses[2];

    // Get the data import details.
    console.log('Data import details:');
    console.log('\tOperation details:');
    console.log(`\t\tName: ${operationDetails.name}`);
    console.log(`\t\tDone: ${operationDetails.done}`);
  })
  .catch(err => {
    console.error(err);
  });

Python

# TODO(developer): Uncomment and set the following variables
# project_id = 'PROJECT_ID_HERE'
# compute_region = 'COMPUTE_REGION_HERE'
# dataset_display_name = 'DATASET_DISPLAY_NAME'
# path = 'gs://path/to/file.csv' or 'bq://project_id.dataset.table_id'

from google.cloud import automl_v1beta1 as automl

client = automl.TablesClient(project=project_id, region=compute_region)

response = None
if path.startswith("bq"):
    response = client.import_data(
        dataset_display_name=dataset_display_name, bigquery_input_uri=path
    )
else:
    # Get the multiple Google Cloud Storage URIs.
    input_uris = path.split(",")
    response = client.import_data(
        dataset_display_name=dataset_display_name,
        gcs_input_uris=input_uris,
    )

print("Processing import...")
# synchronous check of operation status.
print("Data imported. {}".format(response.result()))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.