Python csv.field_size_limit() Examples

The following are 30 code examples of csv.field_size_limit(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module csv , or try the search function .
Example #1
Source File: test_csv.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #2
Source File: test_csv.py    From gcblue with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #3
Source File: lookups_util.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def load_lookup_file_from_disk(file_path):
    """
    parse the lookup file from the given path and return the result

    Args:
        file_path (string): the path to the lookup file

    Returns:
        lookup_data (dict): result from the csv parser
    """
    if not file_exists(file_path):
        raise RuntimeError('Not valid filepath: {}'.format(file_path))

    try:
        with open(file_path, mode='r') as f:
            reader = csv.DictReader(f)
            csv.field_size_limit(CSV_FILESIZE_LIMIT)
            lookup_data = reader.next()
    except Exception as e:
        raise RuntimeError('Error reading model file: %s, %s' % (file_path, str(e)))

    return lookup_data 
Example #4
Source File: resources.py    From SummaryRank with MIT License 6 votes vote down vote up
def _load_sentence_file(self, filename):
        csv.field_size_limit(sys.maxsize)
        opener = gzip.open if filename.endswith('.gz') else open
        entities = dict()
        with opener(filename) as csvfile:
                reader = csv.reader(csvfile, delimiter='\t',quoting=csv.QUOTE_NONE)
                for row in reader:
                        qid = row[0].strip()
                        sentence_json = row[1].strip()
                        if sentence_json:
                                payload = json.loads(sentence_json)
                                annotations = payload['annotations']
                                sentence_entities = [ x['id'] for x in annotations]
                                sentence_entities = [ str(x) for x in sentence_entities]
                                entities[qid] = sentence_entities
                        else:
                                entities[qid] = []
        return entities 
Example #5
Source File: bulkloader.py    From python-compat-runtime with Apache License 2.0 6 votes vote down vote up
def Records(self):
    """Reads the CSV data file and generates row records.

    Yields:
      Lists of strings

    Raises:
      ResumeError: If the progress database and data file indicate a different
        number of rows.
    """
    csv_file = self.openfile(self.csv_filename, 'rb')
    reader = self.create_csv_reader(csv_file, skipinitialspace=True)
    try:

      for record in reader:
        yield record
    except csv.Error, e:
      if e.args and e.args[0].startswith('field larger than field limit'):
        raise FieldSizeLimitError(csv.field_size_limit())
      else:
        raise 
Example #6
Source File: test_csv.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #7
Source File: test_csv.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #8
Source File: index.py    From brotab with MIT License 6 votes vote down vote up
def index(sqlite_filename, tsv_filename):
    logger.info('Reading tsv file %s', tsv_filename)
    # https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
    # https://github.com/balta2ar/brotab/issues/25
    # It should work on Python 3 and Python 2, on any CPU / OS.
    csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))

    with open(tsv_filename, encoding='utf-8') as tsv_file:
        lines = [tuple(line) for line in csv.reader(tsv_file, delimiter='\t',
                                                    quoting=csv.QUOTE_NONE)]

    logger.info(
        'Creating sqlite DB filename %s from tsv %s (%s lines)',
        sqlite_filename, tsv_filename, len(lines))
    conn = sqlite3.connect(sqlite_filename)
    cursor = conn.cursor()
    with suppress(sqlite3.OperationalError):
        cursor.execute('drop table tabs;')
    cursor.execute(
        'create virtual table tabs using fts5('
        '    tab_id, title, url, body, tokenize="porter unicode61");')
    cursor.executemany('insert into tabs values (?, ?, ?, ?)', lines)
    conn.commit()
    conn.close() 
Example #9
Source File: test_csv.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #10
Source File: test_csv.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #11
Source File: test_csv.py    From medicare-demo with Apache License 2.0 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #12
Source File: training.py    From caml-mimic with MIT License 6 votes vote down vote up
def init(args):
    """
        Load data, build model, create optimizer, create vars to hold metrics, etc.
    """
    #need to handle really large text fields
    csv.field_size_limit(sys.maxsize)

    #load vocab and other lookups
    desc_embed = args.lmbda > 0
    print("loading lookups...")
    dicts = datasets.load_lookups(args, desc_embed=desc_embed)

    model = tools.pick_model(args, dicts)
    print(model)

    if not args.test_model:
        optimizer = optim.Adam(model.parameters(), weight_decay=args.weight_decay, lr=args.lr)
    else:
        optimizer = None

    params = tools.make_param_dict(args)
    
    return args, model, optimizer, params, dicts 
Example #13
Source File: test_csv.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #14
Source File: load_data.py    From WeSTClass with Apache License 2.0 6 votes vote down vote up
def read_file(data_dir, with_evaluation):
    data = []
    target = []
    with open(join(data_dir, 'dataset.csv'), 'rt', encoding='utf-8') as csvfile:
        csv.field_size_limit(500 * 1024 * 1024)
        reader = csv.reader(csvfile)
        for row in reader:
            if data_dir == './agnews':
                doc = row[1] + '. ' + row[2]
                data.append(doc)
                target.append(int(row[0]) - 1)
            elif data_dir == './yelp':
                data.append(row[1])
                target.append(int(row[0]) - 1)
    if with_evaluation:
        y = np.asarray(target)
        assert len(data) == len(y)
        assert set(range(len(np.unique(y)))) == set(np.unique(y))
    else:
        y = None
    return data, y 
Example #15
Source File: bulkloader.py    From browserscope with Apache License 2.0 6 votes vote down vote up
def Records(self):
    """Reads the CSV data file and generates row records.

    Yields:
      Lists of strings

    Raises:
      ResumeError: If the progress database and data file indicate a different
        number of rows.
    """
    csv_file = self.openfile(self.csv_filename, 'rb')
    reader = self.create_csv_reader(csv_file, skipinitialspace=True)
    try:

      for record in reader:
        yield record
    except csv.Error, e:
      if e.args and e.args[0].startswith('field larger than field limit'):
        raise FieldSizeLimitError(csv.field_size_limit())
      else:
        raise 
Example #16
Source File: test_csv.py    From CTFCrackTools-V2 with GNU General Public License v3.0 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #17
Source File: dataset.py    From TopicNet with MIT License 6 votes vote down vote up
def _increase_csv_field_max_size():
    """Makes document entry in dataset as big as possible

    References
    ----------
    https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072

    """
    max_int = sys.maxsize

    while True:
        try:
            csv.field_size_limit(max_int)

            break

        except OverflowError:
            max_int = int(max_int / 10) 
Example #18
Source File: test_csv.py    From CTFCrackTools with GNU General Public License v3.0 6 votes vote down vote up
def test_read_bigfield(self):
        # This exercises the buffer realloc functionality and field size
        # limits.
        limit = csv.field_size_limit()
        try:
            size = 50000
            bigstring = 'X' * size
            bigline = '%s,%s' % (bigstring, bigstring)
            self._read_test([bigline], [[bigstring, bigstring]])
            csv.field_size_limit(size)
            self._read_test([bigline], [[bigstring, bigstring]])
            self.assertEqual(csv.field_size_limit(), size)
            csv.field_size_limit(size-1)
            self.assertRaises(csv.Error, self._read_test, [bigline], [])
            self.assertRaises(TypeError, csv.field_size_limit, None)
            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
        finally:
            csv.field_size_limit(limit) 
Example #19
Source File: repdocs.py    From dblp with MIT License 5 votes vote down vote up
def read_lcc_author_repdocs(self):
        """Read and return an iterator over the author repdoc corpus, which excludes
        the authors not in the LCC.
        """
        author_repdoc_file, _, lcc_idmap_file = self.input()

        with lcc_idmap_file.open() as lcc_idmap_f:
            lcc_author_df = pd.read_csv(lcc_idmap_f, header=0, usecols=(0,))
            lcc_author_ids = lcc_author_df['author_id'].values

        csv.field_size_limit(sys.maxint)
        records = util.iter_csv_fwrapper(author_repdoc_file)
        return (doc.split('|') for author_id, doc in records
                if int(author_id) in lcc_author_ids) 
Example #20
Source File: sync.py    From tap-s3-csv with GNU Affero General Public License v3.0 5 votes vote down vote up
def sync_table_file(config, s3_path, table_spec, stream):
    LOGGER.info('Syncing file "%s".', s3_path)

    bucket = config['bucket']
    table_name = table_spec['table_name']

    s3_file_handle = s3.get_file_handle(config, s3_path)
    # We observed data who's field size exceeded the default maximum of
    # 131072. We believe the primary consequence of the following setting
    # is that a malformed, wide CSV would potentially parse into a single
    # large field rather than giving this error, but we also think the
    # chances of that are very small and at any rate the source data would
    # need to be fixed. The other consequence of this could be larger
    # memory consumption but that's acceptable as well.
    csv.field_size_limit(sys.maxsize)
    iterator = singer_encodings_csv.get_row_iterator(
        s3_file_handle._raw_stream, table_spec) #pylint:disable=protected-access

    records_synced = 0

    for row in iterator:
        custom_columns = {
            s3.SDC_SOURCE_BUCKET_COLUMN: bucket,
            s3.SDC_SOURCE_FILE_COLUMN: s3_path,

            # index zero, +1 for header row
            s3.SDC_SOURCE_LINENO_COLUMN: records_synced + 2
        }
        rec = {**row, **custom_columns}

        with Transformer() as transformer:
            to_write = transformer.transform(rec, stream['schema'], metadata.to_map(stream['metadata']))

        singer.write_record(table_name, to_write)
        records_synced += 1

    return records_synced 
Example #21
Source File: bulkload_deprecated.py    From python-compat-runtime with Apache License 2.0 5 votes vote down vote up
def Load(self, kind, data):
    """Parses CSV data, uses a Loader to convert to entities, and stores them.

    On error, fails fast. Returns a "bad request" HTTP response code and
    includes the traceback in the output.

    Args:
      kind: a string containing the entity kind that this loader handles
      data: a string containing the CSV data to load

    Returns:
      tuple (response code, output) where:
        response code: integer HTTP response code to return
        output: string containing the HTTP response body
    """

    data = data.encode('utf-8')
    Validate(kind, basestring)
    Validate(data, basestring)
    output = []

    try:
      loader = Loader.RegisteredLoaders()[kind]
    except KeyError:
      output.append('Error: no Loader defined for kind %s.' % kind)
      return (httplib.BAD_REQUEST, ''.join(output))

    buffer = StringIO.StringIO(data)
    reader = csv.reader(buffer, skipinitialspace=True)

    try:
      csv.field_size_limit(800000)
    except AttributeError:

      pass

    return self.LoadEntities(self.IterRows(reader), loader) 
Example #22
Source File: bulkloader.py    From python-compat-runtime with Apache License 2.0 5 votes vote down vote up
def __init__(self, limit):
    self.message = """
A field in your CSV input file has exceeded the current limit of %d.

You can raise this limit by adding the following lines to your config file:

import csv
csv.field_size_limit(new_limit)

where new_limit is number larger than the size in bytes of the largest
field in your CSV.
""" % limit
    Error.__init__(self, self.message) 
Example #23
Source File: bulkload_client.py    From python-compat-runtime with Apache License 2.0 5 votes vote down vote up
def ContentGenerator(csv_file,
                     batch_size,
                     create_csv_reader=csv.reader,
                     create_csv_writer=csv.writer):
  """Retrieves CSV data up to a batch size at a time.

  Args:
    csv_file: A file-like object for reading CSV data.
    batch_size: Maximum number of CSV rows to yield on each iteration.
    create_csv_reader, create_csv_writer: Used for dependency injection.

  Yields:
    Tuple (entity_count, csv_content) where:
      entity_count: Number of entities contained in the csv_content. Will be
        less than or equal to the batch_size and greater than 0.
      csv_content: String containing the CSV content containing the next
        entity_count entities.
  """
  try:
    csv.field_size_limit(800000)
  except AttributeError:

    pass

  reader = create_csv_reader(csv_file, skipinitialspace=True)
  exhausted = False

  while not exhausted:
    rows_written = 0
    content = StringIO.StringIO()
    writer = create_csv_writer(content)
    try:
      for i in xrange(batch_size):
        row = reader.next()
        writer.writerow(row)
        rows_written += 1
    except StopIteration:
      exhausted = True

    if rows_written > 0:
      yield rows_written, content.getvalue() 
Example #24
Source File: csv_utils.py    From bitcoin-etl with MIT License 5 votes vote down vote up
def set_max_field_size_limit():
    max_int = sys.maxsize
    decrement = True
    while decrement:
        # decrease the maxInt value by factor 10
        # as long as the OverflowError occurs.

        decrement = False
        try:
            csv.field_size_limit(max_int)
        except OverflowError:
            max_int = int(max_int / 10)
            decrement = True 
Example #25
Source File: utils.py    From text with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def unicode_csv_reader(unicode_csv_data, **kwargs):
    r"""Since the standard csv library does not handle unicode in Python 2, we need a wrapper.
    Borrowed and slightly modified from the Python docs:
    https://docs.python.org/2/library/csv.html#csv-examples

    Arguments:
        unicode_csv_data: unicode csv data (see example below)

    Examples:
        >>> from torchtext.utils import unicode_csv_reader
        >>> import io
        >>> with io.open(data_path, encoding="utf8") as f:
        >>>     reader = unicode_csv_reader(f)

    """

    # Fix field larger than field limit error
    maxInt = sys.maxsize
    while True:
        # decrease the maxInt value by factor 10
        # as long as the OverflowError occurs.
        try:
            csv.field_size_limit(maxInt)
            break
        except OverflowError:
            maxInt = int(maxInt / 10)
    csv.field_size_limit(maxInt)

    for line in csv.reader(unicode_csv_data, **kwargs):
        yield line 
Example #26
Source File: utils.py    From audio with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def unicode_csv_reader(unicode_csv_data: TextIOWrapper, **kwargs: Any) -> Any:
    r"""Since the standard csv library does not handle unicode in Python 2, we need a wrapper.
    Borrowed and slightly modified from the Python docs:
    https://docs.python.org/2/library/csv.html#csv-examples
    Args:
        unicode_csv_data (TextIOWrapper): unicode csv data (see example below)

    Examples:
        >>> from torchaudio.datasets.utils import unicode_csv_reader
        >>> import io
        >>> with io.open(data_path, encoding="utf8") as f:
        >>>     reader = unicode_csv_reader(f)
    """

    # Fix field larger than field limit error
    maxInt = sys.maxsize
    while True:
        # decrease the maxInt value by factor 10
        # as long as the OverflowError occurs.
        try:
            csv.field_size_limit(maxInt)
            break
        except OverflowError:
            maxInt = int(maxInt / 10)
    csv.field_size_limit(maxInt)

    for line in csv.reader(unicode_csv_data, **kwargs):
        yield line 
Example #27
Source File: import_geonames.py    From EpiTator with Apache License 2.0 5 votes vote down vote up
def read_geonames_csv():
    print("Downloading geoname data from: " + GEONAMES_ZIP_URL)
    try:
        url = request.urlopen(GEONAMES_ZIP_URL)
    except URLError:
        print("If you are operating behind a firewall, try setting the HTTP_PROXY/HTTPS_PROXY environment variables.")
        raise
    zipfile = ZipFile(BytesIO(url.read()))
    print("Download complete")
    # Loading geonames data may cause errors without setting csv.field_size_limit:
    if sys.platform == "win32":
        max_c_long_on_windows = (2**32 / 2) - 1
        csv.field_size_limit(max_c_long_on_windows)
    else:
        csv.field_size_limit(sys.maxint if six.PY2 else six.MAXSIZE)
    with zipfile.open('allCountries.txt') as f:
        reader = unicodecsv.DictReader(f,
                                       fieldnames=[
                                           k for k, v in geonames_field_mappings],
                                       encoding='utf-8',
                                       delimiter='\t',
                                       quoting=csv.QUOTE_NONE)
        for d in reader:
            d['population'] = parse_number(d['population'], 0)
            d['latitude'] = parse_number(d['latitude'], 0)
            d['longitude'] = parse_number(d['longitude'], 0)
            if len(d['alternatenames']) > 0:
                d['alternatenames'] = d['alternatenames'].split(',')
            else:
                d['alternatenames'] = []
            yield d 
Example #28
Source File: test_reader.py    From batch-scoring with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def really_big_fields_enabled(self):
        old_limit = csv.field_size_limit()
        csv.field_size_limit(2 ** 28)
        yield
        csv.field_size_limit(old_limit) 
Example #29
Source File: __init__.py    From starthinker with Apache License 2.0 5 votes vote down vote up
def csv_to_rows(csv_string):
  if csv_string:
    csv.field_size_limit(sys.maxsize)
    if isinstance(csv_string, str): csv_string = StringIO(csv_string)
    for row in csv.reader(csv_string, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL, skipinitialspace=True, escapechar='\\'):
      yield row 
Example #30
Source File: Sets2Sets.py    From Sets2Sets with Apache License 2.0 5 votes vote down vote up
def generate_dictionary_BA(path, files, attributes_list):
    # path = '../Minnemudac/'
    # files = ['Coborn_history_order.csv','Coborn_future_order.csv']
    # files = ['BA_history_order.csv', 'BA_future_order.csv']
    # attributes_list = ['MATERIAL_NUMBER']
    dictionary_table = {}
    counter_table = {}
    for attr in attributes_list:
        dictionary = {}
        dictionary_table[attr] = dictionary
        counter_table[attr] = 0

    csv.field_size_limit(sys.maxsize)
    for filename in files:
        count = 0
        with open(path + filename, 'r') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in reader:
                if count == 0:
                    count += 1
                    continue
                key = attributes_list[0]
                if row[2] not in dictionary_table[key]:
                    dictionary_table[key][row[2]] = counter_table[key]
                    counter_table[key] = counter_table[key] + 1
                    count += 1

    print(counter_table)

    total = 0
    for key in counter_table.keys():
        total = total + counter_table[key]

    print('# dimensions of final vector: ' + str(total) + ' | ' + str(count - 1))

    return dictionary_table, total, counter_table