python source code of csvimport

sqlobject-master
- run_with_env.cmd
- LICENSE
- appveyor.yml
- debian
  - examples
  - changelog
  - rules
  - copyright
  - docs
  - control
- ANNOUNCE.rst
- README.rst
- setup.py
- sqlobject
  - classregistry.py
  - inheritance
    - iteration.py
    - __init__.py
    - tests
      - test_inheritance_tree.py
      - test_deep_inheritance.py
      - test_foreignKey.py
      - test_destroy_cascade.py
      - test_asdict.py
      - test_inheritance.py
      - __init__.py
      - test_aggregates.py
      - test_indexes.py
  - styles.py
  - sresults.py
  - cache.py
  - index.py
  - constraints.py
  - sybase
    - sybaseconnection.py
    - __init__.py
  - include
    - __init__.py
    - tests
      - test_hashcol.py
      - __init__.py
    - hashcol.py
  - .coveragerc
  - __version__.py
  - declarative.py
  - dbconnection.py
  - mssql
    - mssqlconnection.py
    - __init__.py
  - wsgi_middleware.py
  - sqlite
    - sqliteconnection.py
    - __init__.py
  - joins.py
  - events.py
  - maxdb
    - readme.txt
    - maxdbconnection.py
    - __init__.py
  - util
    - csvimport.py
    - moduleloader.py
    - csvexport.py
    - threadinglocal.py
    - __init__.py
  - mysql
    - mysqlconnection.py
    - __init__.py
  - views.py
  - boundattributes.py
  - postgres
    - __init__.py
    - pgconnection.py
  - col.py
  - converters.py
  - dberrors.py
  - versioning
    - test
      - test_version.py
      - __init__.py
    - __init__.py
  - __init__.py
  - main.py
  - tests
    - test_paste.py
    - test_default_style.py
    - test_select.py
    - test_enum.py
    - test_jsoncol.py
    - test_perConnection.py
    - test_basic.py
    - test_joins_conditional.py
    - test_cache.py
    - test_cyclic_reference.py
    - test_converters.py
    - test_exceptions.py
    - test_combining_joins.py
    - test_SingleJoin.py
    - test_postgres.py
    - test_pickle.py
    - test_string_id.py
    - test_declarative.py
    - test_slice.py
    - dbtest.py
    - test_csvexport.py
    - test_select_through.py
    - test_decimal.py
    - test_picklecol.py
    - test_sqlbuilder_joins_instances.py
    - test_ForeignKey.py
    - test_compat.py
    - test_events.py
    - test_ForeignKey_cascade.py
    - test_sqlbuilder_importproxy.py
    - test_comparison.py
    - test_unicode.py
    - test_class_hash.py
    - test_schema.py
    - test_md5.py
    - test_setters.py
    - test_style.py
    - test_joins.py
    - test_constraints.py
    - test_groupBy.py
    - test_sorting.py
    - test_asdict.py
    - test_expire.py
    - test_empty.py
    - test_parse_uri.py
    - test_blob.py
    - test_lazy.py
    - test_subqueries.py
    - test_distinct.py
    - test_NoneValuedResultItem.py
    - test_create_drop.py
    - test_SQLMultipleJoin.py
    - test_auto.py
    - test_inheritance.py
    - test_sqlobject_admin.py
    - __init__.py
    - test_columns_order.py
    - test_sqlbuilder_dbspecific.py
    - test_mysql.py
    - test_new_joins.py
    - test_SQLRelatedJoin.py
    - test_uuidcol.py
    - test_transactions.py
    - test_jsonbcol.py
    - test_datetime.py
    - test_csvimport.py
    - test_validation.py
    - test_delete.py
    - test_sqlite.py
    - test_sqlmeta_idName.py
    - test_boundattributes.py
    - test_identity.py
    - test_aliases.py
    - test_complex_sorting.py
    - test_aggregates.py
    - test_views.py
    - test_sqlbuilder.py
    - test_indexes.py
    - test_reparent_sqlmeta.py
  - manager
    - __init__.py
    - command.py
  - firebird
    - firebirdconnection.py
    - __init__.py
  - compat.py
  - sqlbuilder.py
  - .gitignore
  - conftest.py
- setup.cfg
- .travis.yml
- scripts
  - sqlobject-admin
  - sqlobject-convertOldURI
- .gitignore
- docs
  - SelectResults.rst
  - download.rst
  - SQLBuilder.rst
  - Authors.rst
  - Makefile
  - rebuild
  - presentation-2004-11
    - ui
      - pretty.css
      - slides.css
      - slides.js
      - bodybg.gif
      - s5-core.css
      - opera.css
      - framing.css
      - print.css
      - custom.css
    - sqlobject-and-database-programming.html
  - TODO.rst
  - sqlobject-admin.rst
  - News5.rst
  - Python3.rst
  - News1.rst
  - DeveloperGuide.rst
  - api
    - sqlobject.tests.test_sqlobject_admin.rst
    - sqlobject.tests.test_compat.rst
    - sqlobject.firebird.rst
    - sqlobject.tests.test_picklecol.rst
    - sqlobject.include.hashcol.rst
    - sqlobject.tests.test_select_through.rst
    - sqlobject.dbconnection.rst
    - sqlobject.rst
    - sqlobject.tests.test_setters.rst
    - sqlobject.tests.test_sqlbuilder_dbspecific.rst
    - sqlobject.tests.test_reparent_sqlmeta.rst
    - sqlobject.tests.test_uuidcol.rst
    - sqlobject.tests.test_style.rst
    - sqlobject.tests.test_constraints.rst
    - sqlobject.versioning.test.rst
    - sqlobject.tests.test_jsoncol.rst
    - sqlobject.inheritance.tests.test_destroy_cascade.rst
    - sqlobject.sqlite.sqliteconnection.rst
    - sqlobject.tests.test_joins_conditional.rst
    - sqlobject.tests.test_boundattributes.rst
    - sqlobject.util.csvexport.rst
    - sqlobject.joins.rst
    - sqlobject.tests.test_combining_joins.rst
    - sqlobject.tests.test_csvimport.rst
    - sqlobject.tests.test_columns_order.rst
    - sqlobject.tests.test_asdict.rst
    - sqlobject.inheritance.iteration.rst
    - sqlobject.tests.test_sqlmeta_idName.rst
    - sqlobject.tests.test_distinct.rst
    - sqlobject.tests.test_create_drop.rst
    - sqlobject.tests.test_SingleJoin.rst
    - sqlobject.tests.test_SQLRelatedJoin.rst
    - sqlobject.mysql.mysqlconnection.rst
    - sqlobject.tests.test_aggregates.rst
    - sqlobject.tests.test_slice.rst
    - sqlobject.tests.test_joins.rst
    - sqlobject.tests.test_comparison.rst
    - sqlobject.tests.test_schema.rst
    - sqlobject.versioning.rst
    - sqlobject.tests.test_cache.rst
    - sqlobject.styles.rst
    - sqlobject.tests.test_empty.rst
    - sqlobject.tests.test_SQLMultipleJoin.rst
    - sqlobject.include.rst
    - sqlobject.include.tests.test_hashcol.rst
    - sqlobject.tests.test_pickle.rst
    - sqlobject.conftest.rst
    - sqlobject.col.rst
    - sqlobject.tests.dbtest.rst
    - sqlobject.maxdb.maxdbconnection.rst
    - sqlobject.sqlite.rst
    - sqlobject.tests.test_blob.rst
    - sqlobject.tests.test_events.rst
    - sqlobject.tests.test_identity.rst
    - sqlobject.maxdb.rst
    - sqlobject.mssql.mssqlconnection.rst
    - sqlobject.mysql.rst
    - sqlobject.tests.test_validation.rst
    - sqlobject.main.rst
    - sqlobject.inheritance.tests.test_deep_inheritance.rst
    - sqlobject.tests.test_new_joins.rst
    - sqlobject.tests.test_complex_sorting.rst
    - sqlobject.converters.rst
    - sqlobject.inheritance.tests.rst
    - sqlobject.sybase.sybaseconnection.rst
    - sqlobject.postgres.rst
    - sqlobject.tests.test_exceptions.rst
    - sqlobject.tests.test_default_style.rst
    - sqlobject.tests.test_sqlbuilder.rst
    - sqlobject.classregistry.rst
    - sqlobject.tests.test_csvexport.rst
    - sqlobject.declarative.rst
    - sqlobject.tests.test_sorting.rst
    - sqlobject.firebird.firebirdconnection.rst
    - sqlobject.tests.test_select.rst
    - sqlobject.tests.test_declarative.rst
    - sqlobject.tests.test_parse_uri.rst
    - sqlobject.tests.test_paste.rst
    - sqlobject.inheritance.tests.test_aggregates.rst
    - sqlobject.views.rst
    - sqlobject.sresults.rst
    - sqlobject.util.threadinglocal.rst
    - sqlobject.include.tests.rst
    - sqlobject.wsgi_middleware.rst
    - sqlobject.tests.test_postgres.rst
    - sqlobject.tests.test_inheritance.rst
    - sqlobject.tests.test_converters.rst
    - sqlobject.tests.test_NoneValuedResultItem.rst
    - sqlobject.tests.rst
    - sqlobject.tests.test_subqueries.rst
    - sqlobject.tests.test_enum.rst
    - sqlobject.tests.test_auto.rst
    - sqlobject.tests.test_groupBy.rst
    - sqlobject.tests.test_sqlbuilder_joins_instances.rst
    - sqlobject.tests.test_decimal.rst
    - sqlobject.util.rst
    - sqlobject.tests.test_jsonbcol.rst
    - sqlobject.cache.rst
    - sqlobject.tests.test_perConnection.rst
    - sqlobject.sybase.rst
    - sqlobject.util.csvimport.rst
    - sqlobject.events.rst
    - modules.rst
    - sqlobject.manager.rst
    - sqlobject.tests.test_cyclic_reference.rst
    - sqlobject.tests.test_class_hash.rst
    - sqlobject.tests.test_unicode.rst
    - sqlobject.compat.rst
    - sqlobject.tests.test_delete.rst
    - sqlobject.inheritance.tests.test_inheritance.rst
    - sqlobject.tests.test_md5.rst
    - sqlobject.inheritance.tests.test_inheritance_tree.rst
    - sqlobject.tests.test_mysql.rst
    - sqlobject.tests.test_expire.rst
    - sqlobject.tests.test_aliases.rst
    - sqlobject.inheritance.rst
    - sqlobject.dberrors.rst
    - sqlobject.tests.test_transactions.rst
    - sqlobject.constraints.rst
    - sqlobject.util.moduleloader.rst
    - sqlobject.tests.test_sqlbuilder_importproxy.rst
    - sqlobject.mssql.rst
    - sqlobject.tests.test_ForeignKey.rst
    - sqlobject.sqlbuilder.rst
    - sqlobject.tests.test_indexes.rst
    - sqlobject.tests.test_datetime.rst
    - sqlobject.index.rst
    - sqlobject.tests.test_sqlite.rst
    - sqlobject.inheritance.tests.test_foreignKey.rst
    - sqlobject.tests.test_string_id.rst
    - sqlobject.postgres.pgconnection.rst
    - sqlobject.manager.command.rst
    - sqlobject.inheritance.tests.test_asdict.rst
    - sqlobject.tests.test_lazy.rst
    - sqlobject.inheritance.tests.test_indexes.rst
    - sqlobject.boundattributes.rst
    - sqlobject.versioning.test.test_version.rst
    - sqlobject.tests.test_views.rst
    - sqlobject.tests.test_basic.rst
  - News.rst
  - europython
    - main.css
    - person.py
    - europython_sqlobj.py
  - community.rst
  - links.rst
  - FAQ.rst
  - News4.rst
  - News3.rst
  - Versioning.rst
  - SQLObject.rst
  - Inheritance.rst
  - index.rst
  - conf.py
  - test.py
  - News2.rst
  - genapidocs
  - .gitignore
  - interface.py
  - Views.rst
- MANIFEST.in
- devscripts
  - sqlobject.org-sitemapconfig.xml
  - test-sqlobject.cmd
  - push-all
  - RELEASE-CHECKLIST
  - test-split.sh
  - split.sh
  - setup
  - tox-select-envs
  - sftp-frs
  - publish-docs
  - cleanup
  - add-remotes
  - build-all-docs
  - prerelease
  - flake8
    - run
    - .gitignore
    - split
  - null-merge
  - CI
    - validators.py
  - release
  - branch
  - tox-select-envs.cmd
  - git-hooks
    - post-rewrite
    - post-checkout
    - post-merge
  - prerelease-tag
  - postrelease
  - build-docs
  - sftp-web
  - BRANCH-CHECKLIST
  - requirements
    - requirements_tests.txt
    - requirements_docs.txt
    - requirements_pygresql.txt
    - requirements_tox.txt
    - requirements.txt
    - requirements_dev.txt
- tox.ini

"""
Import from a CSV file or directory of files.

CSV files should have a header line that lists columns.  Headers can
also be appended with ``:type`` to indicate the type of the field.
``escaped`` is the default, though it can be overridden by the importer.
Supported types:

``:python``:
    A python expression, run through ``eval()``.  This can be a
    security risk, pass in ``allow_python=False`` if you don't want to
    allow it.

``:int``:
    Integer

``:float``:
    Float

``:str``:
    String

``:escaped``:
    A string with backslash escapes (note that you don't put quotation
    marks around the value)

``:base64``:
    A base64-encoded string

``:date``:
    ISO date, like YYYY-MM-DD; this can also be ``NOW+days`` or
    ``NOW-days``

``:datetime``:
    ISO date/time like YYYY-MM-DDTHH:MM:SS (either T or a space can be
    used to separate the time, and seconds are optional).  This can
    also be ``NOW+seconds`` or ``NOW-seconds``

``:bool``:
    Converts true/false/yes/no/on/off/1/0 to boolean value

``:ref``:
    This will be resolved to the ID of the object named in this column
    (None if the column is empty).  @@: Since there's no ordering,
    there's no way to promise the object already exists.

You can also get back references to the objects if you have a special
``[name]`` column.

Any column named ``[comment]`` or with no name will be ignored.

In any column you can put ``[default]`` to exclude the value and use
whatever default the class wants.  ``[null]`` will use NULL.

Lines that begin with ``[comment]`` are ignored.
"""

import csv
from datetime import datetime, date, timedelta
import os
import time
import types

__all__ = ['load_csv_from_directory',
           'load_csv',
           'create_data']


DEFAULT_TYPE = 'escaped'


def create_data(data, class_getter, keyorder=None):
    """
    Create the ``data``, which is the return value from
    ``load_csv()``.  Classes will be resolved with the callable
    ``class_getter``; or if ``class_getter`` is a module then the
    class names will be attributes of that.

    Returns a dictionary of ``{object_name: object(s)}``, using the
    names from the ``[name]`` columns (if there are any).  If a name
    is used multiple times, you get a list of objects, not a single
    object.

    If ``keyorder`` is given, then the keys will be retrieved in that
    order.  It can be a list/tuple of names, or a sorting function.
    If not given and ``class_getter`` is a module and has a
    ``soClasses`` function, then that will be used for the order.
    """
    objects = {}
    classnames = data.keys()
    if (not keyorder and isinstance(class_getter, types.ModuleType)
            and hasattr(class_getter, 'soClasses')):
        keyorder = [c.__name__ for c in class_getter.soClasses]
    if not keyorder:
        classnames.sort()
    elif isinstance(keyorder, (list, tuple)):
        all = classnames
        classnames = [name for name in keyorder if name in classnames]
        for name in all:
            if name not in classnames:
                classnames.append(name)
    else:
        classnames.sort(keyorder)
    for classname in classnames:
        items = data[classname]
        if not items:
            continue
        if isinstance(class_getter, types.ModuleType):
            soClass = getattr(class_getter, classname)
        else:
            soClass = class_getter(classname)
        for item in items:
            for key, value in item.items():
                if isinstance(value, Reference):
                    resolved = objects.get(value.name)
                    if not resolved:
                        raise ValueError(
                            "Object reference to %r does not have target"
                            % value.name)
                    elif (isinstance(resolved, list) and len(resolved) > 1):
                        raise ValueError(
                            "Object reference to %r is ambiguous (got %r)"
                            % (value.name, resolved))
                    item[key] = resolved.id
            if '[name]' in item:
                name = item.pop('[name]').strip()
            else:
                name = None
            inst = soClass(**item)
            if name:
                if name in objects:
                    if isinstance(objects[name], list):
                        objects[name].append(inst)
                    else:
                        objects[name] = [objects[name], inst]
                else:
                    objects[name] = inst
    return objects


def load_csv_from_directory(directory,
                            allow_python=True, default_type=DEFAULT_TYPE,
                            allow_multiple_classes=True):
    """
    Load the data from all the files in a directory.  Filenames
    indicate the class, with ``general.csv`` for data not associated
    with a class.  Return data just like ``load_csv`` does.

    This might cause problems on case-insensitive filesystems.
    """
    results = {}
    for filename in os.listdir(directory):
        base, ext = os.path.splitext(filename)
        if ext.lower() != '.csv':
            continue
        f = open(os.path.join(directory, filename), 'rb')
        csvreader = csv.reader(f)
        data = load_csv(csvreader, allow_python=allow_python,
                        default_type=default_type,
                        default_class=base,
                        allow_multiple_classes=allow_multiple_classes)
        f.close()
        for classname, items in data.items():
            results.setdefault(classname, []).extend(items)
    return results


def load_csv(csvreader, allow_python=True, default_type=DEFAULT_TYPE,
             default_class=None, allow_multiple_classes=True):
    """
    Loads the CSV file, returning a list of dictionaries with types
    coerced.
    """
    current_class = default_class
    current_headers = None
    results = {}

    for row in csvreader:
        if not [cell for cell in row if cell.strip()]:
            # empty row
            continue

        if row and row[0].strip() == 'CLASS:':
            if not allow_multiple_classes:
                raise ValueError(
                    "CLASS: line in CSV file, but multiple classes "
                    "are not allowed in this file (line: %r)" % row)
            if not row[1:]:
                raise ValueError(
                    "CLASS: in line in CSV file, with no class name "
                    "in next column (line: %r)" % row)
            current_class = row[1]
            current_headers = None
            continue

        if not current_class:
            raise ValueError(
                "No CLASS: line given, and there is no default class "
                "for this file (line: %r)" % row)

        if current_headers is None:
            current_headers = _parse_headers(row, default_type,
                                             allow_python=allow_python)
            continue

        if row[0] == '[comment]':
            continue

        # Pad row with empty strings:
        row += [''] * (len(current_headers) - len(row))
        row_converted = {}
        for value, (name, coercer, args) in zip(row, current_headers):
            if name is None:
                # Comment
                continue
            if value == '[default]':
                continue
            if value == '[null]':
                row_converted[name] = None
                continue
            args = (value,) + args
            row_converted[name] = coercer(*args)

        results.setdefault(current_class, []).append(row_converted)

    return results


def _parse_headers(header_row, default_type, allow_python=True):
    headers = []
    for name in header_row:
        original_name = name
        if ':' in name:
            name, type = name.split(':', 1)
        else:
            type = default_type
        if type == 'python' and not allow_python:
            raise ValueError(
                ":python header given when python headers are not allowed "
                "(with header %r)" % original_name)
        name = name.strip()
        if name == '[comment]' or not name:
            headers.append((None, None, None))
            continue
        type = type.strip().lower()
        if '(' in type:
            type, arg = type.split('(', 1)
            if not arg.endswith(')'):
                raise ValueError(
                    "Arguments (in ()'s) do not end with ): %r"
                    % original_name)
            args = (arg[:-1],)
        else:
            args = ()
        if name == '[name]':
            type = 'str'
        coercer, args = get_coercer(type)
        headers.append((name, coercer, args))
    return headers


_coercers = {}


def get_coercer(type):
    if type not in _coercers:
        raise ValueError(
            "Coercion type %r not known (I know: %s)"
            % (type, ', '.join(_coercers.keys())))
    return _coercers[type]


def register_coercer(type, coercer, *args):
    _coercers[type] = (coercer, args)


def identity(v):
    return v

register_coercer('str', identity)
register_coercer('string', identity)


def decode_string(v, encoding):
    return v.decode(encoding)

register_coercer('escaped', decode_string, 'string_escape')
register_coercer('strescaped', decode_string, 'string_escape')
register_coercer('base64', decode_string, 'base64')

register_coercer('int', int)
register_coercer('float', float)


def parse_python(v):
    return eval(v, {}, {})

register_coercer('python', parse_python)


def parse_date(v):
    v = v.strip()
    if not v:
        return None
    if v.startswith('NOW-') or v.startswith('NOW+'):
        days = int(v[3:])
        now = date.today()
        return now + timedelta(days)
    else:
        parsed = time.strptime(v, '%Y-%m-%d')
        return date.fromtimestamp(time.mktime(parsed))

register_coercer('date', parse_date)


def parse_datetime(v):
    v = v.strip()
    if not v:
        return None
    if v.startswith('NOW-') or v.startswith('NOW+'):
        seconds = int(v[3:])
        now = datetime.now()
        return now + timedelta(0, seconds)
    else:
        fmts = ['%Y-%m-%dT%H:%M:%S',
                '%Y-%m-%d %H:%M:%S',
                '%Y-%m-%dT%H:%M',
                '%Y-%m-%d %H:%M']
        for fmt in fmts[:-1]:
            try:
                parsed = time.strptime(v, fmt)
                break
            except ValueError:
                pass
        else:
            parsed = time.strptime(v, fmts[-1])
        return datetime.fromtimestamp(time.mktime(parsed))

register_coercer('datetime', parse_datetime)


class Reference(object):
    def __init__(self, name):
        self.name = name


def parse_ref(v):
    if not v.strip():
        return None
    else:
        return Reference(v)

register_coercer('ref', parse_ref)


def parse_bool(v):
    v = v.strip().lower()
    if v in ('y', 'yes', 't', 'true', 'on', '1'):
        return True
    elif v in ('n', 'no', 'f', 'false', 'off', '0'):
        return False
    raise ValueError(
        "Value is not boolean-like: %r" % v)

register_coercer('bool', parse_bool)
register_coercer('boolean', parse_bool)