"""Basic functionality.""" import contextlib import functools import inspect import multiprocessing.pool import re import sys import time import traceback from typing import ( Any, Callable, Container, Dict, Hashable, Iterable, Iterator, List, Mapping, Optional, Set, Sequence, Type, Tuple, Union ) import numpy as np from .. import options # define common types Array = Any RecArray = Any Data = Dict[str, Array] Options = Dict[str, Any] Bounds = Tuple[Array, Array] # define a pool managed by parallel and used by generate_items pool = None @contextlib.contextmanager def parallel(processes: int) -> Iterator[None]: r"""Context manager used for parallel processing in a ``with`` statement context. This manager creates a context in which a pool of Python processes will be used by any method that requires market-by-market computation. These methods will distribute their work among the processes. After the context created by the ``with`` statement ends, all worker processes in the pool will be terminated. Outside of this context, such methods will not use multiprocessing. Importantly, multiprocessing will only improve speed if gains from parallelization outweigh overhead from serializing and passing data between processes. For example, if computation for a single market is very fast and there is a lot of data in each market that must be serialized and passed between processes, using multiprocessing may reduce overall speed. Arguments --------- processes : `int` Number of Python processes that will be created and used by any method that supports parallel processing. Examples -------- .. raw:: latex \begin{examplenotebook} .. toctree:: /_notebooks/api/parallel.ipynb .. raw:: latex \end{examplenotebook} """ # validate the number of processes if not isinstance(processes, int): raise TypeError("processes must be an int.") if processes < 2: raise ValueError("processes must be at least 2.") # start the process pool, wait for work to be done, and then terminate it output(f"Starting a pool of {processes} processes ...") start_time = time.time() global pool try: with multiprocessing.pool.Pool(processes) as pool: output(f"Started the process pool after {format_seconds(time.time() - start_time)}.") yield output(f"Terminating the pool of {processes} processes ...") terminate_time = time.time() finally: pool = None output(f"Terminated the process pool after {format_seconds(time.time() - terminate_time)}.") def generate_items(keys: Iterable, factory: Callable[[Any], tuple], method: Callable) -> Iterator: """Generate (key, method(*factory(key))) tuples for each key. The first element returned by factory is an instance of the class to which method is attached. If a process pool has been initialized, use multiprocessing; otherwise, use serial processing. """ if pool is None: return (generate_items_worker((k, factory(k), method)) for k in keys) return pool.imap_unordered(generate_items_worker, ((k, factory(k), method) for k in keys)) def generate_items_worker(args: Tuple[Any, tuple, Callable]) -> Tuple[Any, Any]: """Call the the specified method of a class instance with any additional arguments. Return the associated key along with the returned object. """ key, (instance, *method_args), method = args return key, method(instance, *method_args) def structure_matrices(mapping: Mapping) -> RecArray: """Structure a mapping of keys to (array or None, type) tuples as a record array in which each sub-array is guaranteed to be at least two-dimensional. """ # determine the number of rows in all matrices size = next(a.shape[0] for a, _ in mapping.values() if a is not None) # collect matrices and data types matrices: List[Array] = [] dtypes: List[Tuple[Union[str, Tuple[Hashable, str]], Any, Tuple[int]]] = [] for key, (array, dtype) in mapping.items(): matrix = np.zeros((size, 0)) if array is None else np.c_[array] dtypes.append((key, dtype, (matrix.shape[1],))) matrices.append(matrix) # build the record array structured = np.recarray(size, dtypes) for dtype, matrix in zip(dtypes, matrices): structured[dtype[0] if isinstance(dtype[0], str) else dtype[0][1]] = matrix return structured def update_matrices(matrices: RecArray, update_mapping: Dict) -> RecArray: """Update fields in a record array created by structure_matrices by re-structuring the matrices.""" mapping = update_mapping.copy() for key in matrices.dtype.names: if key not in mapping: if len(matrices.dtype.fields[key]) > 2: mapping[(matrices.dtype.fields[key][2], key)] = (matrices[key], matrices[key].dtype) else: mapping[key] = (matrices[key], matrices[key].dtype) return structure_matrices(mapping) def extract_matrix(structured_array_like: Mapping, key: Any) -> Optional[Array]: """Attempt to extract a field from a structured array-like object or horizontally stack field0, field1, and so on, into a full matrix. The extracted array will have at least two dimensions. """ try: matrix = np.c_[structured_array_like[key]] return matrix if matrix.size > 0 else None except Exception: index = 0 parts: List[Array] = [] while True: try: part = np.c_[structured_array_like[f'{key}{index}']] except Exception: # output a warning if there's a 1 but no 0 (this is a common mistake) if index == 0: try: structured_array_like[f'{key}{index + 1}'] except Exception: pass else: output("") output(f"Warning: '{key}{index + 1}' was specified but not '{key}{index}'.") output("") break index += 1 if part.size > 0: parts.append(part) return np.hstack(parts) if parts else None def extract_size(structured_array_like: Mapping) -> int: """Attempt to extract the number of rows from a structured array-like object.""" size = 0 getters = [ lambda m: m.shape[0], lambda m: next(iter(structured_array_like.values())).shape[0], lambda m: len(next(iter(structured_array_like.values()))), lambda m: len(m) ] for get in getters: try: size = get(structured_array_like) break except Exception: pass if size > 0: return size raise TypeError( f"Failed to get the number of rows in the structured array-like object of type {type(structured_array_like)}. " f"Try using a dictionary, a NumPy structured array, a Pandas DataFrame, or any other standard type." ) def interact_ids(*columns: Array) -> Array: """Create interactions of ID columns.""" interacted = columns[0].flatten().astype(np.object) if len(columns) > 1: interacted[:] = list(zip(*columns)) return interacted def output(message: Any) -> None: """Print a message if verbosity is turned on.""" if options.verbose: if not callable(options.verbose_output): raise TypeError("options.verbose_output should be callable.") options.verbose_output(str(message)) if options.flush_output: sys.stdout.flush() def output_progress(iterable: Iterable, length: int, start_time: float) -> Iterator: """Yield results from an iterable while outputting progress updates at most every minute.""" elapsed = time.time() - start_time next_minute = int(elapsed / 60) + 1 for index, iterated in enumerate(iterable): yield iterated elapsed = time.time() - start_time if elapsed > 60 * next_minute: output(f"Finished {index + 1} out of {length} after {format_seconds(elapsed)}.") next_minute = int(elapsed / 60) + 1 def format_seconds(seconds: float) -> str: """Prepare a number of seconds to be displayed as a string.""" hours, remainder = divmod(int(round(seconds)), 60**2) minutes, seconds = divmod(remainder, 60) return f'{hours:02}:{minutes:02}:{seconds:02}' def format_number(number: Any) -> str: """Prepare a number to be displayed as a string.""" if not isinstance(options.digits, int): raise TypeError("options.digits must be an int.") template = f"{{:^+{options.digits + 6}.{options.digits - 1}E}}" formatted = template.format(float(number)) if "NAN" in formatted: formatted = formatted.replace("+", " ") return formatted def format_se(se: Any) -> str: """Prepare a standard error to be displayed as a string.""" formatted = format_number(se) for string in ["NAN", "-INF", "+INF"]: if string in formatted: return formatted.replace(string, f"({string})") return f"({formatted})" def format_options(mapping: Options) -> str: """Prepare a mapping of options to be displayed as a string.""" strings: List[str] = [] for key, value in mapping.items(): if callable(value): value = f'{value.__module__}.{value.__qualname__}' elif isinstance(value, float): value = format_number(value) strings.append(f'{key}: {value}') joined = ', '.join(strings) return f'{{{joined}}}' def format_table( header: Sequence, *data: Sequence, title: Optional[str] = None, include_border: bool = True, include_header: bool = True, line_indices: Container[int] = ()) -> str: """Format table information as a string, which has fixed widths, vertical lines after any specified indices, and optionally a title, border, and header. """ # construct the header rows row_index = -1 header_rows: List[List[str]] = [] header = [[c] if isinstance(c, str) else c for c in header] while True: header_row = ["" if len(c) < -row_index else c[row_index] for c in header] if not any(header_row): break header_rows.insert(0, header_row) row_index -= 1 # construct the data rows data_rows = [[str(c) for c in r] + [""] * (len(header) - len(r)) for r in data] # compute column widths widths = [] for column_index in range(len(header)): widths.append(max(len(r[column_index]) for r in header_rows + data_rows)) # build the template template = " " .join("{{:^{}}}{}".format(w, " |" if i in line_indices else "") for i, w in enumerate(widths)) # build the table lines = [] if title is not None: lines.append(f"{title}:") if include_border: lines.append("=" * len(template.format(*[""] * len(widths)))) if include_header: lines.extend([template.format(*r) for r in header_rows]) lines.append(template.format(*("-" * w for w in widths))) lines.extend([template.format(*r) for r in data_rows]) if include_border: lines.append("=" * len(template.format(*[""] * len(widths)))) return "\n".join(lines) def get_indices(ids: Array) -> Dict[Hashable, Array]: """From a one-dimensional array input, construct a dictionary with keys that are the unique values of the array and values that are the indices where the key appears in the array. """ flat = ids.flatten() sort_indices = flat.argsort(kind='mergesort') sorted_ids = flat[sort_indices] changes = np.ones(flat.shape, np.bool) changes[1:] = sorted_ids[1:] != sorted_ids[:-1] reduce_indices = np.nonzero(changes)[0] return dict(zip(sorted_ids[reduce_indices], np.split(sort_indices, reduce_indices)[1:])) def compute_finite_differences(f: Callable[[Array], Array], x: Array, epsilon_scale: float = 1.0) -> Array: """Approximate a matrix of derivatives with finite differences.""" epsilon = epsilon_scale * options.finite_differences_epsilon columns = [] for index in range(x.size): x1 = x.copy() x2 = x.copy() x1[index] += epsilon / 2 x2[index] -= epsilon / 2 columns.append((f(x1) - f(x2)) / epsilon) return np.column_stack(columns) class SolverStats(object): """Structured statistics returned by a generic numerical solver.""" converged: bool iterations: int evaluations: int def __init__(self, converged: bool = True, iterations: int = 0, evaluations: int = 0) -> None: """Structure the statistics.""" self.converged = converged self.iterations = iterations self.evaluations = evaluations class StringRepresentation(object): """Object that defers to its string representation.""" def __repr__(self) -> str: """Defer to the string representation.""" return str(self) class Groups(object): """Computation of grouped statistics.""" sort_indices: Array reduce_indices: Array unique: Array codes: Array counts: Array group_count: int def __init__(self, ids: Array) -> None: """Sort and index IDs that define groups.""" # sort the IDs flat = ids.flatten() self.sort_indices = flat.argsort() sorted_ids = flat[self.sort_indices] # identify groups changes = np.ones(flat.shape, np.bool) changes[1:] = sorted_ids[1:] != sorted_ids[:-1] self.reduce_indices = np.nonzero(changes)[0] self.unique = sorted_ids[self.reduce_indices] # encode the groups sorted_codes = np.cumsum(changes) - 1 self.codes = sorted_codes[self.sort_indices.argsort()] # compute counts self.group_count = self.reduce_indices.size self.counts = np.diff(np.append(self.reduce_indices, self.codes.size)) def sum(self, matrix: Array) -> Array: """Compute the sum of each group.""" return np.add.reduceat(matrix[self.sort_indices], self.reduce_indices) def mean(self, matrix: Array) -> Array: """Compute the mean of each group.""" return self.sum(matrix) / self.counts[:, None] def expand(self, statistics: Array) -> Array: """Expand statistics for each group to the size of the original matrix.""" return statistics[self.codes] class Error(Exception): """Errors that are indistinguishable from others with the same message, which is parsed from the docstring.""" stack: Optional[str] def __init__(self) -> None: """Optionally store the full current traceback for debugging purposes.""" if options.verbose_tracebacks: self.stack = ''.join(traceback.format_stack()) else: self.stack = None def __eq__(self, other: Any) -> bool: """Defer to hashes.""" return hash(self) == hash(other) def __hash__(self) -> int: """Hash this instance such that in collections it is indistinguishable from others with the same message.""" return hash((type(self).__name__, str(self))) def __repr__(self) -> str: """Defer to the string representation.""" return str(self) def __str__(self) -> str: """Replace docstring markdown with simple text.""" doc = inspect.getdoc(self) assert doc is not None # normalize LaTeX while True: match = re.search(r':math:`([^`]+)`', doc) if match is None: break start, end = match.span() doc = doc[:start] + re.sub(r'\s+', ' ', re.sub(r'[\\{}]', ' ', match.group(1))).lower() + doc[end:] # normalize references while True: match = re.search(r':ref:`[a-zA-Z0-9]+:([^`]+)`', doc) if match is None: break start, end = match.span() doc = doc[:start] + re.sub(r'<[^>]+>', '', match.group(1)) + doc[end:] # remove all remaining domains and compress whitespace doc = re.sub(r'[\s\n]+', ' ', re.sub(r':[a-z\-]+:|`', '', doc)) # optionally add the full traceback if self.stack is not None: doc = f"{doc} Traceback:\n\n{self.stack}\n" return doc class DerivedError(Error): """Error derived from another exception.""" _exception: Exception def __init__(self, exception: Exception) -> None: """Store the exception from which this error is derived.""" super().__init__() self._exception = exception def __str__(self) -> str: """Supplement the error with the exception's message.""" return f"{super().__str__()} Exception encountered: '{self._exception}'." class NumericalError(Error): """Floating point issues.""" _messages: Set[str] def __init__(self) -> None: super().__init__() self._messages: Set[str] = set() def __str__(self) -> str: """Supplement the error with the messages.""" combined = ", ".join(sorted(self._messages)) return f"{super().__str__()} Errors encountered: {combined}." class MultipleReversionError(Error): """Reversion of problematic elements.""" _bad: int _total: int def __init__(self, bad_indices: Array) -> None: """Store element counts.""" super().__init__() self._bad = bad_indices.sum() self._total = bad_indices.size def __str__(self) -> str: """Supplement the error with the counts.""" return f"{super().__str__()} Number of reverted elements: {self._bad} out of {self._total}." class InversionError(Error): """Problems with inverting a matrix.""" _condition: float def __init__(self, matrix: Array) -> None: """Compute condition number of the matrix.""" super().__init__() from .algebra import compute_condition_number self._condition = compute_condition_number(matrix) def __str__(self) -> str: """Supplement the error with the condition number.""" return f"{super().__str__()} Condition number: {format_number(self._condition)}." class InversionReplacementError(InversionError): """Problems with inverting a matrix led to the use of a replacement such as an approximation.""" _replacement: str def __init__(self, matrix: Array, replacement: str) -> None: """Store the replacement description.""" super().__init__(matrix) self._replacement = replacement def __str__(self) -> str: """Supplement the error with the description.""" return f"{super().__str__()} The inverse was replaced with {self._replacement}." class NumericalErrorHandler(object): """Decorator that appends errors to a function's returned list when numerical errors are encountered.""" error: Type[NumericalError] def __init__(self, error: Type[NumericalError]) -> None: """Store the error class.""" self.error = error def __call__(self, decorated: Callable) -> Callable: """Decorate the function.""" @functools.wraps(decorated) def wrapper(*args: Any, **kwargs: Any) -> Any: """Configure NumPy to detect numerical errors.""" detector = NumericalErrorDetector(self.error) with np.errstate(divide='call', over='call', under='ignore', invalid='call'): np.seterrcall(detector) returned = decorated(*args, **kwargs) if detector.detected is not None: returned[-1].append(detector.detected) return returned return wrapper class NumericalErrorDetector(object): """Error detector to be passed to NumPy's error call function.""" error: Type[NumericalError] detected: Optional[NumericalError] def __init__(self, error: Type[NumericalError]) -> None: """By default no error is detected.""" self.error = error self.detected = None def __call__(self, message: str, _: int) -> None: """Initialize the error and store the error message.""" if self.detected is None: self.detected = self.error() self.detected._messages.add(message)