# -*- coding: utf-8 -*- """ Wrappers for some loading/saving functionality. Author: Gertjan van den Burg """ import os import warnings from ._optional import import_optional_dependency from .detect import Detector from .dict_read_write import DictReader from .exceptions import NoDetectionResult from .read import reader from .utils import get_encoding from .write import writer def stream_dicts( filename, dialect=None, encoding=None, num_chars=None, verbose=False ): """Read a CSV file as a generator over dictionaries This function streams the rows of the CSV file as dictionaries. The keys of the dictionaries are assumed to be in the first row of the CSV file. The dialect will be detected automatically, unless it is provided. Parameters ---------- filename : str Path of the CSV file dialect : str, SimpleDialect, or csv.Dialect object If the dialect is known, it can be provided here. This function uses the Clevercsv :class:`clevercsv.DictReader` object, which supports various dialect types (string, SimpleDialect, or csv.Dialect). If None, the dialect will be detected. encoding : str The encoding of the file. If None, it is detected. num_chars : int Number of characters to use to detect the dialect. If None, use the entire file. Note that using less than the entire file will speed up detection, but can reduce the accuracy of the detected dialect. verbose: bool Whether or not to show detection progress. Returns ------- rows: generator Returns file as a generator over rows as dictionaries. Raises ------ NoDetectionResult When the dialect detection fails. """ if encoding is None: encoding = get_encoding(filename) with open(filename, "r", newline="", encoding=encoding) as fid: if dialect is None: data = fid.read(num_chars) if num_chars else fid.read() dialect = Detector().detect(data, verbose=verbose) fid.seek(0) r = DictReader(fid, dialect=dialect) for row in r: yield row def read_dicts( filename, dialect=None, encoding=None, num_chars=None, verbose=False ): """Read a CSV file as a list of dictionaries This function returns the rows of the CSV file as a list of dictionaries. The keys of the dictionaries are assumed to be in the first row of the CSV file. The dialect will be detected automatically, unless it is provided. Parameters ---------- filename : str Path of the CSV file dialect : str, SimpleDialect, or csv.Dialect object If the dialect is known, it can be provided here. This function uses the Clevercsv :class:`clevercsv.DictReader` object, which supports various dialect types (string, SimpleDialect, or csv.Dialect). If None, the dialect will be detected. encoding : str The encoding of the file. If None, it is detected. num_chars : int Number of characters to use to detect the dialect. If None, use the entire file. Note that using less than the entire file will speed up detection, but can reduce the accuracy of the detected dialect. verbose: bool Whether or not to show detection progress. Returns ------- rows: list Returns rows of the file as a list of dictionaries. Raises ------ NoDetectionResult When the dialect detection fails. """ return list( stream_dicts( filename, dialect=dialect, encoding=encoding, num_chars=num_chars, verbose=verbose, ) ) def read_as_dicts( filename, dialect=None, encoding=None, num_chars=None, verbose=False ): """This function is deprecated, use read_dicts instead.""" warnings.warn( "'read_as_dicts' was renamed to 'read_dicts' in version " "0.6.3 and will be removed in 0.7.0.", FutureWarning, ) return read_dicts( filename, dialect=dialect, encoding=encoding, num_chars=num_chars, verbose=verbose, ) def read_csv( filename, dialect=None, encoding=None, num_chars=None, verbose=False, ): """This function is deprecated, use read_table instead.""" warnings.warn( "'read_csv' was renamed to 'read_table' in version " "0.6.3 and will be removed in 0.7.0.", FutureWarning, ) return read_table( filename, dialect=dialect, encoding=encoding, num_chars=num_chars, verbose=verbose, ) def read_table( filename, dialect=None, encoding=None, num_chars=None, verbose=False, ): """Read a CSV file as a table (a list of lists) This is a convenience function that reads a CSV file and returns the data as a list of lists (= rows). The dialect will be detected automatically, unless it is provided. Parameters ---------- filename: str Path of the CSV file dialect: str, SimpleDialect, or csv.Dialect object If the dialect is known, it can be provided here. This function uses the CleverCSV :class:`clevercsv.reader` object, which supports various dialect types (string, SimpleDialect, or csv.Dialect). If None, the dialect will be detected. encoding : str The encoding of the file. If None, it is detected. num_chars : int Number of characters to use to detect the dialect. If None, use the entire file. Note that using less than the entire file will speed up detection, but can reduce the accuracy of the detected dialect. verbose: bool Whether or not to show detection progress. Returns ------- rows: list Returns rows as a list of lists. Raises ------ NoDetectionResult When the dialect detection fails. """ return list( stream_table( filename, dialect=dialect, encoding=encoding, num_chars=num_chars, verbose=verbose, ) ) def stream_csv( filename, dialect=None, encoding=None, num_chars=None, verbose=False, ): """This function is deprecated, use stream_table instead.""" warnings.warn( "'stream_csv' was renamed to 'stream_table' in version " "0.6.3 and will be removed in 0.7.0.", FutureWarning, ) yield from stream_table( filename, dialect=dialect, encoding=encoding, num_chars=num_chars, verbose=verbose, ) def stream_table( filename, dialect=None, encoding=None, num_chars=None, verbose=False, ): """Read a CSV file as a generator over rows of a table This is a convenience function that reads a CSV file and returns the data as a generator of rows. The dialect will be detected automatically, unless it is provided. Parameters ---------- filename: str Path of the CSV file dialect: str, SimpleDialect, or csv.Dialect object If the dialect is known, it can be provided here. This function uses the CleverCSV :class:`clevercsv.reader` object, which supports various dialect types (string, SimpleDialect, or csv.Dialect). If None, the dialect will be detected. encoding : str The encoding of the file. If None, it is detected. num_chars : int Number of characters to use to detect the dialect. If None, use the entire file. Note that using less than the entire file will speed up detection, but can reduce the accuracy of the detected dialect. verbose: bool Whether or not to show detection progress. Returns ------- rows: generator Returns file as a generator over rows. Raises ------ NoDetectionResult When the dialect detection fails. """ if encoding is None: encoding = get_encoding(filename) with open(filename, "r", newline="", encoding=encoding) as fid: if dialect is None: data = fid.read(num_chars) if num_chars else fid.read() dialect = Detector().detect(data, verbose=verbose) if dialect is None: raise NoDetectionResult() fid.seek(0) r = reader(fid, dialect) yield from r def csv2df(filename, *args, num_chars=None, **kwargs): """This function is deprecated, use read_dataframe instead.""" warnings.warn( "'csv2df' was renamed to 'read_dataframe' in version " "0.6.3 and will be removed in 0.7.0.", FutureWarning, ) return read_dataframe(filename, *args, num_chars=num_chars, **kwargs) def read_dataframe(filename, *args, num_chars=None, **kwargs): """ Read a CSV file to a Pandas dataframe This function uses CleverCSV to detect the dialect, and then passes this to the ``read_csv`` function in pandas. Additional arguments and keyword arguments are passed to ``read_csv`` as well. Parameters ---------- filename: str The filename of the CSV file. At the moment, only local files are supported. *args: Additional arguments for the ``pandas.read_csv`` function. num_chars: int Number of characters to use for dialect detection. If None, use the entire file. Note that using less than the entire file will speed up detection, but can reduce the accuracy of the detected dialect. **kwargs: Additional keyword arguments for the ``pandas.read_csv`` function. You can specify the file encoding here if needed, and it will be used during dialect detection. """ if not (os.path.exists(filename) and os.path.isfile(filename)): raise ValueError("Filename must be a regular file") pd = import_optional_dependency("pandas") # Use provided encoding or detect it, and record it for pandas enc = kwargs.get("encoding") or get_encoding(filename) kwargs["encoding"] = enc with open(filename, "r", newline="", encoding=enc) as fid: data = fid.read(num_chars) if num_chars else fid.read() dialect = Detector().detect(data) csv_dialect = dialect.to_csv_dialect() # This is used to catch pandas' warnings when a dialect is supplied. with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message="^Conflicting values for .*", category=pd.errors.ParserWarning, ) df = pd.read_csv(filename, *args, dialect=csv_dialect, **kwargs) return df def detect_dialect( filename, num_chars=None, encoding=None, verbose=False, method="auto" ): """Detect the dialect of a CSV file This is a utility function that simply returns the detected dialect of a given CSV file. Parameters ---------- filename : str The filename of the CSV file. num_chars : int Number of characters to read for the detection. If None, the entire file will be read. Note that limiting the number of characters can reduce the accuracy of the detected dialect. encoding : str The file encoding of the CSV file. If None, it is detected. verbose : bool Enable verbose mode during detection. method : str Dialect detection method to use. Either 'normal' for normal form detection, 'consistency' for the consistency measure, or 'auto' for first normal and then consistency. Returns ------- dialect : SimpleDialect The detected dialect as a :class:`SimpleDialect`, or None if detection failed. """ enc = encoding or get_encoding(filename) with open(filename, "r", newline="", encoding=enc) as fp: data = fp.read(num_chars) if num_chars else fp.read() dialect = Detector().detect(data, verbose=verbose, method=method) return dialect def write_table(table, filename, dialect="excel", transpose=False): """Write a table (a list of lists) to a file This is a convenience function for writing a table to a CSV file. Parameters ---------- table : list A table as a list of lists. The table must have the same number of cells in each row (taking the :attr:`transpose` flag into account). filename : str The filename of the CSV file to write the table to. dialect : SimpleDialect or csv.Dialect The dialect to use. transpose : bool Transpose the table before writing. Raises ------ ValueError: When the length of the rows is not constant. """ if transpose: table = list(map(list, zip(*table))) if len(set(map(len, table))) > 1: raise ValueError("Table doesn't have constant row length.") with open(filename, "w", newline="") as fp: w = writer(fp, dialect=dialect) w.writerows(table)