Import data

Stay organized with collections Save and categorize content based on your preferences.

Imports data using the import_data method.

Code sample

Node.js

To learn how to install and use the client library for Vertex AI, see Vertex AI client libraries. For more information, see the Vertex AI Node.js API reference documentation.

/**
 * TODO(developer): Uncomment these variables before running the sample.\
 */

// const datasetId = "YOUR_DATASET_ID";
// const gcsSourceUri = "YOUR_GCS_SOURCE_URI";
// eg. "gs://<your-gcs-bucket>/<import_source_path>/[file.csv/file.jsonl]"
// const importSchemaUri = "YOUR_IMPORT_SCHEMA_URI";
// const project = "YOUR_PROJECT_ID";
// const location = 'YOUR_PROJECT_LOCATION';

// Imports the Google Cloud Dataset Service Client library
const {DatasetServiceClient} = require('@google-cloud/aiplatform');

// Specifies the location of the api endpoint
const clientOptions = {
  apiEndpoint: 'us-central1-aiplatform.googleapis.com',
};
const datasetServiceClient = new DatasetServiceClient(clientOptions);

async function importData() {
  const name = datasetServiceClient.datasetPath(project, location, datasetId);
  // Here we use only one import config with one source
  const importConfigs = [
    {
      gcsSource: {uris: [gcsSourceUri]},
      importSchemaUri: importSchemaUri,
    },
  ];
  const request = {
    name,
    importConfigs,
  };

  // Create Import Data Request
  const [response] = await datasetServiceClient.importData(request);
  console.log(`Long running operation : ${response.name}`);

  // Wait for operation to complete
  await response.promise();

  console.log(`Import data response : ${JSON.stringify(response.result)}`);
}
importData();

Python

To learn how to install and use the client library for Vertex AI, see Vertex AI client libraries. For more information, see the Vertex AI Python API reference documentation.

from google.cloud import aiplatform


def import_data_sample(
    project: str,
    dataset_id: str,
    gcs_source_uri: str,
    import_schema_uri: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    timeout: int = 1800,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.DatasetServiceClient(client_options=client_options)
    # Here we use only one import config with one source
    import_configs = [
        {
            "gcs_source": {"uris": [gcs_source_uri]},
            "import_schema_uri": import_schema_uri,
        }
    ]
    name = client.dataset_path(project=project, location=location, dataset=dataset_id)
    response = client.import_data(name=name, import_configs=import_configs)
    print("Long running operation:", response.operation.name)
    import_data_response = response.result(timeout=timeout)
    print("import_data_response:", import_data_response)

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.