Managed I/O for Apache Iceberg supports dynamic destinations. Instead of
writing to a single fixed table, the connector can dynamically select a
destination table based on field values within the incoming records.
To use dynamic destinations, provide a template for the table configuration
parameter. For more information, see
Dynamic destinations.
Examples
The following examples show how to use Managed I/O to write to
Apache Iceberg.
Write to an Apache Iceberg table
The following example writes in-memory JSON data to an Apache Iceberg table.
importcom.google.common.collect.ImmutableMap;importjava.util.Arrays;importjava.util.List;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.schemas.Schema;importorg.apache.beam.sdk.transforms.Create;importorg.apache.beam.sdk.transforms.JsonToRow;importorg.apache.beam.sdk.values.PCollectionRowTuple;publicclassApacheIcebergWrite{staticfinalList<String>TABLE_ROWS=Arrays.asList("{\"id\":0, \"name\":\"Alice\"}","{\"id\":1, \"name\":\"Bob\"}","{\"id\":2, \"name\":\"Charles\"}");staticfinalStringCATALOG_TYPE="hadoop";// The schema for the table rows.publicstaticfinalSchemaSCHEMA=newSchema.Builder().addStringField("name").addInt64Field("id").build();publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);@Description("The name of the table to write to")StringgetTableName();voidsetTableName(Stringvalue);}publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// --tableName= $TABLE_NAME// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type",CATALOG_TYPE).build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("table",options.getTableName()).put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig).build();// Build the pipeline.pipeline.apply(Create.of(TABLE_ROWS)).apply(JsonToRow.withSchema(SCHEMA)).apply(Managed.write(Managed.ICEBERG).withConfig(config));pipeline.run().waitUntilFinish();}}
Write with dynamic destinations
The following example writes to different Apache Iceberg tables based on a
field in the input data.
importcom.google.common.collect.ImmutableMap;importjava.util.Arrays;importjava.util.List;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.PipelineResult;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.schemas.Schema;importorg.apache.beam.sdk.transforms.Create;importorg.apache.beam.sdk.transforms.JsonToRow;publicclassApacheIcebergDynamicDestinations{// The schema for the table rows.publicstaticfinalSchemaSCHEMA=newSchema.Builder().addInt64Field("id").addStringField("name").addStringField("airport").build();// The data to write to table, formatted as JSON strings.staticfinalList<String>TABLE_ROWS=List.of("{\"id\":0, \"name\":\"Alice\", \"airport\": \"ORD\" }","{\"id\":1, \"name\":\"Bob\", \"airport\": \"SYD\" }","{\"id\":2, \"name\":\"Charles\", \"airport\": \"ORD\" }");publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);}// Write JSON data to Apache Iceberg, using dynamic destinations to determine the Iceberg table// where Dataflow writes each record. The JSON data contains a field named "airport". The// Dataflow pipeline writes to Iceberg tables with the naming pattern "flights-{airport}".publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type","hadoop").build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig)// Route the incoming records based on the value of the "airport" field..put("table","flights-{airport}")// Specify which fields to keep from the input data..put("keep",Arrays.asList("name","id")).build();// Build the pipeline.pipeline// Read in-memory JSON data..apply(Create.of(TABLE_ROWS))// Convert the JSON records to Row objects..apply(JsonToRow.withSchema(SCHEMA))// Write each Row to Apache Iceberg..apply(Managed.write(Managed.ICEBERG).withConfig(config));// Run the pipeline.pipeline.run().waitUntilFinish();}}
[[["Easy to understand","easyToUnderstand","thumb-up"],["Solved my problem","solvedMyProblem","thumb-up"],["Other","otherUp","thumb-up"]],[["Hard to understand","hardToUnderstand","thumb-down"],["Incorrect information or sample code","incorrectInformationOrSampleCode","thumb-down"],["Missing the information/samples I need","missingTheInformationSamplesINeed","thumb-down"],["Other","otherDown","thumb-down"]],["Last updated 2025-08-21 UTC."],[[["\u003cp\u003eManaged I/O connector enables writing from Dataflow to Apache Iceberg, supporting batch and streaming writes, as well as dynamic destinations and dynamic table creation.\u003c/p\u003e\n"],["\u003cp\u003eSupported catalogs for Apache Iceberg include Hadoop, Hive, REST-based catalogs, and BigQuery metastore, with specific dependencies or version requirements for some.\u003c/p\u003e\n"],["\u003cp\u003eThe \u003ccode\u003eBigQueryIO\u003c/code\u003e connector with BigQuery Storage API should be used for BigQuery tables for Apache Iceberg, noting that these tables must already exist, as dynamic table creation is not supported.\u003c/p\u003e\n"],["\u003cp\u003eConfiguration for Managed I/O to write to Apache Iceberg involves specifying parameters like \u003ccode\u003etable\u003c/code\u003e, \u003ccode\u003ecatalog_name\u003c/code\u003e, \u003ccode\u003ecatalog_properties\u003c/code\u003e, \u003ccode\u003econfig_properties\u003c/code\u003e, and \u003ccode\u003etriggering_frequency_seconds\u003c/code\u003e.\u003c/p\u003e\n"],["\u003cp\u003eManaged I/O for Apache Iceberg supports writing to multiple tables dynamically based on field values within incoming records, demonstrated through examples that showcase both standard and dynamic table writing processes.\u003c/p\u003e\n"]]],[],null,[]]