Managed I/O per Apache Iceberg supporta le destinazioni dinamiche. Anziché
scrivere in una singola tabella fissa, il connettore può selezionare dinamicamente una
tabella di destinazione in base ai valori dei campi all'interno dei record in entrata.
Per utilizzare le destinazioni dinamiche, fornisci un modello per il parametro di configurazione table. Per ulteriori informazioni, consulta Destinazioni dinamiche.
Esempi
Gli esempi seguenti mostrano come utilizzare Managed I/O per scrivere in
Apache Iceberg.
Scrivi in una tabella Apache Iceberg
Il seguente esempio scrive dati JSON in memoria in una tabella Apache Iceberg.
importcom.google.common.collect.ImmutableMap;importjava.util.Arrays;importjava.util.List;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.schemas.Schema;importorg.apache.beam.sdk.transforms.Create;importorg.apache.beam.sdk.transforms.JsonToRow;importorg.apache.beam.sdk.values.PCollectionRowTuple;publicclassApacheIcebergWrite{staticfinalList<String>TABLE_ROWS=Arrays.asList("{\"id\":0, \"name\":\"Alice\"}","{\"id\":1, \"name\":\"Bob\"}","{\"id\":2, \"name\":\"Charles\"}");staticfinalStringCATALOG_TYPE="hadoop";// The schema for the table rows.publicstaticfinalSchemaSCHEMA=newSchema.Builder().addStringField("name").addInt64Field("id").build();publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);@Description("The name of the table to write to")StringgetTableName();voidsetTableName(Stringvalue);}publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// --tableName= $TABLE_NAME// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type",CATALOG_TYPE).build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("table",options.getTableName()).put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig).build();// Build the pipeline.pipeline.apply(Create.of(TABLE_ROWS)).apply(JsonToRow.withSchema(SCHEMA)).apply(Managed.write(Managed.ICEBERG).withConfig(config));pipeline.run().waitUntilFinish();}}
Scrivere con destinazioni dinamiche
L'esempio seguente scrive in tabelle Apache Iceberg diverse in base a un campo nei dati di input.
importcom.google.common.collect.ImmutableMap;importjava.util.Arrays;importjava.util.List;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.PipelineResult;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.schemas.Schema;importorg.apache.beam.sdk.transforms.Create;importorg.apache.beam.sdk.transforms.JsonToRow;publicclassApacheIcebergDynamicDestinations{// The schema for the table rows.publicstaticfinalSchemaSCHEMA=newSchema.Builder().addInt64Field("id").addStringField("name").addStringField("airport").build();// The data to write to table, formatted as JSON strings.staticfinalList<String>TABLE_ROWS=List.of("{\"id\":0, \"name\":\"Alice\", \"airport\": \"ORD\" }","{\"id\":1, \"name\":\"Bob\", \"airport\": \"SYD\" }","{\"id\":2, \"name\":\"Charles\", \"airport\": \"ORD\" }");publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);}// Write JSON data to Apache Iceberg, using dynamic destinations to determine the Iceberg table// where Dataflow writes each record. The JSON data contains a field named "airport". The// Dataflow pipeline writes to Iceberg tables with the naming pattern "flights-{airport}".publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type","hadoop").build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig)// Route the incoming records based on the value of the "airport" field..put("table","flights-{airport}")// Specify which fields to keep from the input data..put("keep",Arrays.asList("name","id")).build();// Build the pipeline.pipeline// Read in-memory JSON data..apply(Create.of(TABLE_ROWS))// Convert the JSON records to Row objects..apply(JsonToRow.withSchema(SCHEMA))// Write each Row to Apache Iceberg..apply(Managed.write(Managed.ICEBERG).withConfig(config));// Run the pipeline.pipeline.run().waitUntilFinish();}}
[[["Facile da capire","easyToUnderstand","thumb-up"],["Il problema è stato risolto","solvedMyProblem","thumb-up"],["Altra","otherUp","thumb-up"]],[["Difficile da capire","hardToUnderstand","thumb-down"],["Informazioni o codice di esempio errati","incorrectInformationOrSampleCode","thumb-down"],["Mancano le informazioni o gli esempi di cui ho bisogno","missingTheInformationSamplesINeed","thumb-down"],["Problema di traduzione","translationIssue","thumb-down"],["Altra","otherDown","thumb-down"]],["Ultimo aggiornamento 2025-09-04 UTC."],[[["\u003cp\u003eManaged I/O connector enables writing from Dataflow to Apache Iceberg, supporting batch and streaming writes, as well as dynamic destinations and dynamic table creation.\u003c/p\u003e\n"],["\u003cp\u003eSupported catalogs for Apache Iceberg include Hadoop, Hive, REST-based catalogs, and BigQuery metastore, with specific dependencies or version requirements for some.\u003c/p\u003e\n"],["\u003cp\u003eThe \u003ccode\u003eBigQueryIO\u003c/code\u003e connector with BigQuery Storage API should be used for BigQuery tables for Apache Iceberg, noting that these tables must already exist, as dynamic table creation is not supported.\u003c/p\u003e\n"],["\u003cp\u003eConfiguration for Managed I/O to write to Apache Iceberg involves specifying parameters like \u003ccode\u003etable\u003c/code\u003e, \u003ccode\u003ecatalog_name\u003c/code\u003e, \u003ccode\u003ecatalog_properties\u003c/code\u003e, \u003ccode\u003econfig_properties\u003c/code\u003e, and \u003ccode\u003etriggering_frequency_seconds\u003c/code\u003e.\u003c/p\u003e\n"],["\u003cp\u003eManaged I/O for Apache Iceberg supports writing to multiple tables dynamically based on field values within incoming records, demonstrated through examples that showcase both standard and dynamic table writing processes.\u003c/p\u003e\n"]]],[],null,["# Write from Dataflow to Apache Iceberg\n\nTo write from Dataflow to Apache Iceberg, use the\n[managed I/O connector](/dataflow/docs/guides/managed-io-iceberg).\n\nManaged I/O supports the following capabilities for Apache Iceberg:\n\nFor [BigQuery tables for Apache Iceberg](/bigquery/docs/iceberg-tables),\nuse the\n[`BigQueryIO` connector](https://beam.apache.org/documentation/io/built-in/google-bigquery/)\nwith BigQuery Storage API. The table must already exist; dynamic table creation is\nnot supported.\n\nDependencies\n------------\n\nAdd the following dependencies to your project: \n\n### Java\n\n \u003cdependency\u003e\n \u003cgroupId\u003eorg.apache.beam\u003c/groupId\u003e\n \u003cartifactId\u003ebeam-sdks-java-managed\u003c/artifactId\u003e\n \u003cversion\u003e${beam.version}\u003c/version\u003e\n \u003c/dependency\u003e\n\n \u003cdependency\u003e\n \u003cgroupId\u003eorg.apache.beam\u003c/groupId\u003e\n \u003cartifactId\u003ebeam-sdks-java-io-iceberg\u003c/artifactId\u003e\n \u003cversion\u003e${beam.version}\u003c/version\u003e\n \u003c/dependency\u003e\n\nDynamic destinations\n--------------------\n\nManaged I/O for Apache Iceberg supports dynamic destinations. Instead of\nwriting to a single fixed table, the connector can dynamically select a\ndestination table based on field values within the incoming records.\n\nTo use dynamic destinations, provide a template for the `table` configuration\nparameter. For more information, see\n[Dynamic destinations](/dataflow/docs/guides/managed-io#dynamic-destinations).\n\nExamples\n--------\n\nThe following examples show how to use Managed I/O to write to\nApache Iceberg.\n\n### Write to an Apache Iceberg table\n\nThe following example writes in-memory JSON data to an Apache Iceberg table. \n\n### Java\n\n\nTo authenticate to Dataflow, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import com.google.common.collect.ImmutableMap;\n import java.util.Arrays;\n import java.util.List;\n import java.util.Map;\n import org.apache.beam.sdk.Pipeline;\n import org.apache.beam.sdk.managed.Managed;\n import org.apache.beam.sdk.options.Description;\n import org.apache.beam.sdk.options.PipelineOptions;\n import org.apache.beam.sdk.options.PipelineOptionsFactory;\n import org.apache.beam.sdk.schemas.Schema;\n import org.apache.beam.sdk.transforms.Create;\n import org.apache.beam.sdk.transforms.JsonToRow;\n import org.apache.beam.sdk.values.PCollectionRowTuple;\n\n public class ApacheIcebergWrite {\n static final List\u003cString\u003e TABLE_ROWS = Arrays.asList(\n \"{\\\"id\\\":0, \\\"name\\\":\\\"Alice\\\"}\",\n \"{\\\"id\\\":1, \\\"name\\\":\\\"Bob\\\"}\",\n \"{\\\"id\\\":2, \\\"name\\\":\\\"Charles\\\"}\"\n );\n\n static final String CATALOG_TYPE = \"hadoop\";\n\n // The schema for the table rows.\n public static final Schema SCHEMA = new Schema.Builder()\n .addStringField(\"name\")\n .addInt64Field(\"id\")\n .build();\n\n public interface Options extends PipelineOptions {\n @Description(\"The URI of the Apache Iceberg warehouse location\")\n String getWarehouseLocation();\n\n void setWarehouseLocation(String value);\n\n @Description(\"The name of the Apache Iceberg catalog\")\n String getCatalogName();\n\n void setCatalogName(String value);\n\n @Description(\"The name of the table to write to\")\n String getTableName();\n\n void setTableName(String value);\n }\n\n public static void main(String[] args) {\n\n // Parse the pipeline options passed into the application. Example:\n // --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \\\n // --tableName= $TABLE_NAME\n // For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options\n Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);\n Pipeline pipeline = Pipeline.create(options);\n\n // Configure the Iceberg source I/O\n Map catalogConfig = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"warehouse\", options.getWarehouseLocation())\n .put(\"type\", CATALOG_TYPE)\n .build();\n\n ImmutableMap\u003cString, Object\u003e config = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"table\", options.getTableName())\n .put(\"catalog_name\", options.getCatalogName())\n .put(\"catalog_properties\", catalogConfig)\n .build();\n\n // Build the pipeline.\n pipeline.apply(Create.of(TABLE_ROWS))\n .apply(JsonToRow.withSchema(SCHEMA))\n .apply(Managed.write(Managed.ICEBERG).withConfig(config));\n\n pipeline.run().waitUntilFinish();\n }\n }\n\n\u003cbr /\u003e\n\n### Write with dynamic destinations\n\nThe following example writes to different Apache Iceberg tables based on a\nfield in the input data. \n\n### Java\n\n\nTo authenticate to Dataflow, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import com.google.common.collect.ImmutableMap;\n import java.util.Arrays;\n import java.util.List;\n import java.util.Map;\n import org.apache.beam.sdk.Pipeline;\n import org.apache.beam.sdk.PipelineResult;\n import org.apache.beam.sdk.managed.Managed;\n import org.apache.beam.sdk.options.Description;\n import org.apache.beam.sdk.options.PipelineOptions;\n import org.apache.beam.sdk.options.PipelineOptionsFactory;\n import org.apache.beam.sdk.schemas.Schema;\n import org.apache.beam.sdk.transforms.Create;\n import org.apache.beam.sdk.transforms.JsonToRow;\n\n public class ApacheIcebergDynamicDestinations {\n\n // The schema for the table rows.\n public static final Schema SCHEMA = new Schema.Builder()\n .addInt64Field(\"id\")\n .addStringField(\"name\")\n .addStringField(\"airport\")\n .build();\n\n // The data to write to table, formatted as JSON strings.\n static final List\u003cString\u003e TABLE_ROWS = List.of(\n \"{\\\"id\\\":0, \\\"name\\\":\\\"Alice\\\", \\\"airport\\\": \\\"ORD\\\" }\",\n \"{\\\"id\\\":1, \\\"name\\\":\\\"Bob\\\", \\\"airport\\\": \\\"SYD\\\" }\",\n \"{\\\"id\\\":2, \\\"name\\\":\\\"Charles\\\", \\\"airport\\\": \\\"ORD\\\" }\"\n );\n\n public interface Options extends PipelineOptions {\n @Description(\"The URI of the Apache Iceberg warehouse location\")\n String getWarehouseLocation();\n\n void setWarehouseLocation(String value);\n\n @Description(\"The name of the Apache Iceberg catalog\")\n String getCatalogName();\n\n void setCatalogName(String value);\n }\n\n // Write JSON data to Apache Iceberg, using dynamic destinations to determine the Iceberg table\n // where Dataflow writes each record. The JSON data contains a field named \"airport\". The\n // Dataflow pipeline writes to Iceberg tables with the naming pattern \"flights-{airport}\".\n public static void main(String[] args) {\n // Parse the pipeline options passed into the application. Example:\n // --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \\\n // For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options\n Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);\n Pipeline pipeline = Pipeline.create(options);\n\n // Configure the Iceberg source I/O\n Map catalogConfig = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"warehouse\", options.getWarehouseLocation())\n .put(\"type\", \"hadoop\")\n .build();\n\n ImmutableMap\u003cString, Object\u003e config = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"catalog_name\", options.getCatalogName())\n .put(\"catalog_properties\", catalogConfig)\n // Route the incoming records based on the value of the \"airport\" field.\n .put(\"table\", \"flights-{airport}\")\n // Specify which fields to keep from the input data.\n .put(\"keep\", Arrays.asList(\"name\", \"id\"))\n .build();\n\n // Build the pipeline.\n pipeline\n // Read in-memory JSON data.\n .apply(Create.of(TABLE_ROWS))\n // Convert the JSON records to Row objects.\n .apply(JsonToRow.withSchema(SCHEMA))\n // Write each Row to Apache Iceberg.\n .apply(Managed.write(Managed.ICEBERG).withConfig(config));\n\n // Run the pipeline.\n pipeline.run().waitUntilFinish();\n }\n }\n\n\u003cbr /\u003e\n\nWhat's next\n-----------\n\n- [Read from Apache Iceberg](/dataflow/docs/guides/read-from-iceberg).\n- Learn more about [Managed I/O](/dataflow/docs/guides/managed-io)."]]