Note: Python 2.7 has reached end of support on January 31, 2024. Your existing Python 2.7 applications will continue to run and receive traffic. However, App Engine might block re-deployment of applications that use runtimes after their end of support date. We recommend that you migrate to the latest supported version of Python.
Source code for google.appengine.ext.bulkload.csv_connector

#!/usr/bin/env python## Copyright 2007 Google Inc.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at##     http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.#"""Bulkloader CSV reading and writing.Handle the CSV format specified in a bulkloader.yaml file."""importcodecsimportcStringIOimportcsvimportencodingsimportencodings.asciiimportencodings.cp1252importencodings.latin_1importencodings.utf_8fromgoogle.appengine.ext.bulkloadimportbulkloader_errorsfromgoogle.appengine.ext.bulkloadimportconnector_interface[docs]def utf8_recoder(stream, encoding):
  """Generator that reads an encoded stream and reencodes to UTF-8."""


  for line in codecs.getreader(encoding)(stream):
    yield line.encode('utf-8')
[docs]classUnicodeDictWriter(object):"""Based on UnicodeWriter in http://docs.python.org/library/csv.html."""def__init__(self,stream,fieldnames,encoding='utf-8',**kwds):"""Initialzer.    Args:      stream: Stream to write to.      fieldnames: Fieldnames to pass to the DictWriter.      encoding: Desired encoding.      kwds: Additional arguments to pass to the DictWriter.    """writer=codecs.getwriter(encoding)if(writerisencodings.utf_8.StreamWriterorwriterisencodings.ascii.StreamWriterorwriterisencodings.latin_1.StreamWriterorwriterisencodings.cp1252.StreamWriter):self.no_recoding=Trueself.encoder=codecs.getencoder(encoding)self.writer=csv.DictWriter(stream,fieldnames,**kwds)else:self.no_recoding=Falseself.encoder=codecs.getencoder('utf-8')self.queue=cStringIO.StringIO()self.writer=csv.DictWriter(self.queue,fieldnames,**kwds)self.stream=writer(stream)[docs]  def writerow(self, row):
    """Wrap writerow method."""
    row_encoded = dict([(k, self.encoder(v)[0]) for (k, v) in row.iteritems()])
    self.writer.writerow(row_encoded)
    if self.no_recoding:
      return


    data = self.queue.getvalue()
    data = data.decode('utf-8')
    self.stream.write(data)

    self.queue.truncate(0)
[docs]classCsvConnector(connector_interface.ConnectorInterface):"""Read/write a (possibly encoded) CSV file."""[docs]  @classmethod
  def create_from_options(cls, options, name):
    """Factory using an options dictionary.

    Args:
      options: Dictionary of options:
        columns: 'from_header' or blank.
        column_list: overrides columns specifically.
        encoding: encoding of the file. e.g. 'utf-8' (default), 'windows-1252'.
        skip_import_header_row: True to ignore the header line on import.
          Defaults False, except must be True if columns=from_header.
        print_export_header_row: True to print a header line on export.
          Defaults to False except if columns=from_header.
        import_options: Other kwargs to pass in, like "dialect".
        export_options: Other kwargs to pass in, like "dialect".
      name: The name of this transformer, for use in error messages.

    Returns:
      CsvConnector object described by the specified options.

    Raises:
      InvalidConfiguration: If the config is invalid.
    """
    column_list = options.get('column_list', None)
    columns = None
    if not column_list:
      columns = options.get('columns', 'from_header')
      if columns != 'from_header':
        raise bulkloader_errors.InvalidConfiguration(
            'CSV columns must be "from_header", or a column_list '
            'must be specified. (In transformer name %s.)' % name)
    csv_encoding = options.get('encoding', 'utf-8')















    skip_import_header_row = options.get('skip_import_header_row',
                                         columns == 'from_header')
    if columns == 'from_header' and not skip_import_header_row:
      raise bulkloader_errors.InvalidConfiguration(
          'When CSV columns are "from_header", the header row must always '
          'be skipped. (In transformer name %s.)' % name)
    print_export_header_row = options.get('print_export_header_row',
                                          columns == 'from_header')
    import_options = options.get('import_options', {})
    export_options = options.get('export_options', {})
    return cls(columns, column_list, skip_import_header_row,
               print_export_header_row, csv_encoding, import_options,
               export_options)
def__init__(self,columns,column_list,skip_import_header_row,print_export_header_row,csv_encoding=None,import_options=None,export_options=None):"""Initializer.    Args:      columns: 'from_header' or blank      column_list: overrides columns specifically.      skip_import_header_row: True to ignore the header line on import.        Defaults False, except must be True if columns=from_header.      print_export_header_row: True to print a header line on export.        Defaults to False except if columns=from_header.      csv_encoding: encoding of the file.      import_options: Other kwargs to pass in, like "dialect".      export_options: Other kwargs to pass in, like "dialect".    """self.columns=columnsself.from_header=(columns=='from_header')self.column_list=column_listself.skip_import_header_row=skip_import_header_rowself.print_export_header_row=print_export_header_rowself.csv_encoding=csv_encodingself.dict_generator=Noneself.output_stream=Noneself.csv_writer=Noneself.bulkload_state=Noneself.import_options=import_optionsor{}self.export_options=export_optionsor{}[docs]  def generate_import_record(self, filename, bulkload_state):
    """Generator, yields dicts for nodes found as described in the options.

    Args:
      filename: Filename to read.
      bulkload_state: Passed bulkload_state.

    Yields:
      Neutral dict, one per row in the CSV file.
    """
    self.bulkload_state = bulkload_state
    input_stream = open(filename)
    input_stream = utf8_recoder(input_stream, self.csv_encoding)

    self.dict_generator = csv.DictReader(input_stream, self.column_list,
                                         **self.import_options)

    discard_line = self.skip_import_header_row and not self.from_header

    line_number = 0
    for input_dict in self.dict_generator:
      line_number = line_number + 1
      if discard_line:
        discard_line = False
        continue




      decoded_dict = {}
      for key, value in input_dict.iteritems():
        if key == None:
          raise bulkloader_errors.InvalidImportData(
              'Got more values in row than headers on line %d.'
              % (line_number))
        if not self.column_list:

          key = unicode(key, 'utf-8')
        if value:
          value = unicode(value, 'utf-8')
        decoded_dict[key] = value
      yield decoded_dict
[docs]  def initialize_export(self, filename, bulkload_state):
    """Initialize the output file.

    Args:
      filename: Filename to write.
      bulkload_state: Passed bulkload_state.
    """
    self.bulkload_state = bulkload_state

    self.output_stream = open(filename, 'wb')
def__initialize_csv_writer(self,dictionary):"""Actual initialization, happens on the first entity being written."""write_header=self.print_export_header_rowifself.from_header:export_column_list=tuple(dictionary)else:export_column_list=self.column_listself.csv_writer=UnicodeDictWriter(self.output_stream,export_column_list,self.csv_encoding,**self.export_options)ifwrite_header:self.csv_writer.writerow(dict(zip(export_column_list,export_column_list)))[docs]  def write_dict(self, dictionary):
    """Write one record for the specified entity."""
    if not self.csv_writer:
      self.__initialize_csv_writer(dictionary)
    self.csv_writer.writerow(dictionary)
[docs]  def finalize_export(self):
    self.output_stream.close()