python source code of convert

import csv
import sys
import numpy as np
import pandas as pd
from collections import OrderedDict
from toolz import itemmap, keymap, valmap

from .utils import decode_escapes, decode_array


MAPPING = {'object': 'String',
           'uint64': 'UInt64',
           'uint32': 'UInt32',
           'uint16': 'UInt16',
           'uint8': 'UInt8',
           'float64': 'Float64',
           'float32': 'Float32',
           'int64': 'Int64',
           'int32': 'Int32',
           'int16': 'Int16',
           'int8': 'Int8',
           'datetime64[D]': 'Date',
           'datetime64[ns]': 'DateTime'}

PD2CH = keymap(np.dtype, MAPPING)
CH2PD = itemmap(reversed, MAPPING)
CH2PD['Null'] = 'object'
CH2PD['Nothing'] = 'object'

NULLABLE_COLS = ['UInt64', 'UInt32', 'UInt16', 'UInt8', 'Float64', 'Float32',
                 'Int64', 'Int32', 'Int16', 'Int8', 'String']

for col in NULLABLE_COLS:
    CH2PD['Nullable({})'.format(col)] = CH2PD[col]
PY3 = sys.version_info[0] == 3


def normalize(df, index=True):
    if index:
        df = df.reset_index()

    for col in df.select_dtypes([bool]):
        df[col] = df[col].astype('uint8')

    dtypes = valmap(PD2CH.get, OrderedDict(df.dtypes))
    if None in dtypes.values():
        raise ValueError('Unknown type mapping in dtypes: {}'.format(dtypes))

    return dtypes, df


def to_csv(df):
    data = df.to_csv(header=False, index=False, encoding='utf-8',
                     quoting=csv.QUOTE_NONNUMERIC, escapechar='\\')
    if PY3:
        return data.encode('utf-8')
    else:
        return data


def to_dataframe(lines, **kwargs):
    names = lines.readline().decode('utf-8').strip().split('\t')
    types = lines.readline().decode('utf-8').strip().split('\t')

    dtypes, parse_dates, converters = {}, [], {}
    for name, chtype in zip(names, types):
        dtype = CH2PD.get(chtype, 'object')

        if chtype.startswith("Array("):
            converters[name] = decode_array
        elif dtype == 'object':
            converters[name] = decode_escapes
        elif dtype.startswith('datetime'):
            parse_dates.append(name)
        else:
            dtypes[name] = dtype

    return pd.read_csv(lines, sep='\t', header=None, names=names, dtype=dtypes,
                       parse_dates=parse_dates, converters=converters,
                       na_values=set(), keep_default_na=False, **kwargs)


def partition(df, chunksize=1000):
    nrows = df.shape[0]
    nchunks = int(nrows / chunksize) + 1
    for i in range(nchunks):
        start_i = i * chunksize
        end_i = min((i + 1) * chunksize, nrows)
        if start_i >= end_i:
            break

        chunk = df.iloc[start_i:end_i]
        yield chunk