importcom.google.common.collect.ImmutableMap;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.io.TextIO;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.transforms.MapElements;importorg.apache.beam.sdk.values.PCollectionRowTuple;importorg.apache.beam.sdk.values.TypeDescriptors;publicclassApacheIcebergRead{staticfinalStringCATALOG_TYPE="hadoop";publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("Path to write the output file")StringgetOutputPath();voidsetOutputPath(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);@Description("The name of the table to write to")StringgetTableName();voidsetTableName(Stringvalue);}publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// --tableName= $TABLE_NAME --outputPath=$OUTPUT_FILE// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type",CATALOG_TYPE).build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("table",options.getTableName()).put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig).build();// Build the pipeline.pipeline.apply(Managed.read(Managed.ICEBERG).withConfig(config)).getSinglePCollection()// Format each record as a string with the format 'id:name'..apply(MapElements.into(TypeDescriptors.strings()).via((row->{returnString.format("%d:%s",row.getInt64("id"),row.getString("name"));})))// Write to a text file..apply(TextIO.write().to(options.getOutputPath()).withNumShards(1).withSuffix(".txt"));pipeline.run().waitUntilFinish();}}
Sauf indication contraire, le contenu de cette page est régi par une licence Creative Commons Attribution 4.0, et les échantillons de code sont régis par une licence Apache 2.0. Pour en savoir plus, consultez les Règles du site Google Developers. Java est une marque déposée d'Oracle et/ou de ses sociétés affiliées.
Dernière mise à jour le 2025/09/04 (UTC).
[[["Facile à comprendre","easyToUnderstand","thumb-up"],["J'ai pu résoudre mon problème","solvedMyProblem","thumb-up"],["Autre","otherUp","thumb-up"]],[["Difficile à comprendre","hardToUnderstand","thumb-down"],["Informations ou exemple de code incorrects","incorrectInformationOrSampleCode","thumb-down"],["Il n'y a pas l'information/les exemples dont j'ai besoin","missingTheInformationSamplesINeed","thumb-down"],["Problème de traduction","translationIssue","thumb-down"],["Autre","otherDown","thumb-down"]],["Dernière mise à jour le 2025/09/04 (UTC)."],[[["\u003cp\u003eThe Managed I/O connector facilitates reading from Apache Iceberg to Dataflow, supporting Hadoop, Hive, REST-based catalogs, and BigQuery metastore.\u003c/p\u003e\n"],["\u003cp\u003eManaged I/O for Apache Iceberg enables batch reads, batch writes, streaming writes, dynamic destinations, and dynamic table creation.\u003c/p\u003e\n"],["\u003cp\u003eFor BigQuery tables, the \u003ccode\u003eBigQueryIO\u003c/code\u003e connector is used, requiring pre-existing tables without support for dynamic table creation.\u003c/p\u003e\n"],["\u003cp\u003eReading from Apache Iceberg is achieved by using the \u003ccode\u003eManaged.read(Managed.ICEBERG)\u003c/code\u003e in a pipeline, which can then be transformed and outputted to various destinations, like text files as shown in the provided example.\u003c/p\u003e\n"],["\u003cp\u003eTo use the managed Iceberg I/O connectors, you must include the \u003ccode\u003ebeam-sdks-java-managed\u003c/code\u003e and \u003ccode\u003ebeam-sdks-java-io-iceberg\u003c/code\u003e dependencies.\u003c/p\u003e\n"]]],[],null,["# Read from Apache Iceberg to Dataflow\n\nTo read from Apache Iceberg to Dataflow, use the\n[managed I/O connector](/dataflow/docs/guides/managed-io-iceberg).\n\nManaged I/O supports the following capabilities for Apache Iceberg:\n\nFor [BigQuery tables for Apache Iceberg](/bigquery/docs/iceberg-tables),\nuse the\n[`BigQueryIO` connector](https://beam.apache.org/documentation/io/built-in/google-bigquery/)\nwith BigQuery Storage API. The table must already exist; dynamic table creation is\nnot supported.\n\nDependencies\n------------\n\nAdd the following dependencies to your project: \n\n### Java\n\n \u003cdependency\u003e\n \u003cgroupId\u003eorg.apache.beam\u003c/groupId\u003e\n \u003cartifactId\u003ebeam-sdks-java-managed\u003c/artifactId\u003e\n \u003cversion\u003e${beam.version}\u003c/version\u003e\n \u003c/dependency\u003e\n\n \u003cdependency\u003e\n \u003cgroupId\u003eorg.apache.beam\u003c/groupId\u003e\n \u003cartifactId\u003ebeam-sdks-java-io-iceberg\u003c/artifactId\u003e\n \u003cversion\u003e${beam.version}\u003c/version\u003e\n \u003c/dependency\u003e\n\nExample\n-------\n\nThe following example reads from an Apache Iceberg table and writes the\ndata to text files. \n\n### Java\n\n\nTo authenticate to Dataflow, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import com.google.common.collect.ImmutableMap;\n import java.util.Map;\n import org.apache.beam.sdk.Pipeline;\n import org.apache.beam.sdk.io.TextIO;\n import org.apache.beam.sdk.managed.Managed;\n import org.apache.beam.sdk.options.Description;\n import org.apache.beam.sdk.options.PipelineOptions;\n import org.apache.beam.sdk.options.PipelineOptionsFactory;\n import org.apache.beam.sdk.transforms.MapElements;\n import org.apache.beam.sdk.values.PCollectionRowTuple;\n import org.apache.beam.sdk.values.TypeDescriptors;\n\n public class ApacheIcebergRead {\n\n static final String CATALOG_TYPE = \"hadoop\";\n\n public interface Options extends PipelineOptions {\n @Description(\"The URI of the Apache Iceberg warehouse location\")\n String getWarehouseLocation();\n\n void setWarehouseLocation(String value);\n\n @Description(\"Path to write the output file\")\n String getOutputPath();\n\n void setOutputPath(String value);\n\n @Description(\"The name of the Apache Iceberg catalog\")\n String getCatalogName();\n\n void setCatalogName(String value);\n\n @Description(\"The name of the table to write to\")\n String getTableName();\n\n void setTableName(String value);\n }\n\n public static void main(String[] args) {\n\n // Parse the pipeline options passed into the application. Example:\n // --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \\\n // --tableName= $TABLE_NAME --outputPath=$OUTPUT_FILE\n // For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options\n Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);\n Pipeline pipeline = Pipeline.create(options);\n\n // Configure the Iceberg source I/O\n Map catalogConfig = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"warehouse\", options.getWarehouseLocation())\n .put(\"type\", CATALOG_TYPE)\n .build();\n\n ImmutableMap\u003cString, Object\u003e config = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"table\", options.getTableName())\n .put(\"catalog_name\", options.getCatalogName())\n .put(\"catalog_properties\", catalogConfig)\n .build();\n\n // Build the pipeline.\n pipeline.apply(Managed.read(Managed.ICEBERG).withConfig(config))\n .getSinglePCollection()\n // Format each record as a string with the format 'id:name'.\n .apply(MapElements\n .into(TypeDescriptors.strings())\n .via((row -\u003e {\n return String.format(\"%d:%s\",\n row.getInt64(\"id\"),\n row.getString(\"name\"));\n })))\n // Write to a text file.\n .apply(\n TextIO.write()\n .to(options.getOutputPath())\n .withNumShards(1)\n .withSuffix(\".txt\"));\n\n pipeline.run().waitUntilFinish();\n }\n }\n\n\u003cbr /\u003e\n\nWhat's next\n-----------\n\n- [Write to Apache Iceberg](/dataflow/docs/guides/write-to-iceberg).\n- Learn more about [Managed I/O](/dataflow/docs/guides/managed-io)."]]