python source code of config

# coding=utf-8
# Copyright 2018 The Gin-Config Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provides a parser for Gin configuration files."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import ast
import collections
import io
import re
import tokenize

from gin import selector_map
from gin import utils

import six

# A regular expression matching a valid module identifier. A valid module
# identifier consists of one or more valid identifiers (see below), separated by
# periods (as in a Python module).
MODULE_RE = selector_map.SELECTOR_RE
# A regular expression matching valid identifiers. A valid identifier consists
# of a string beginning with an alphabet character or underscore, followed by
# any number of alphanumeric (or underscore) characters, as in Python.
IDENTIFIER_RE = re.compile(r'^[a-zA-Z_]\w*$')


class ParserDelegate(object):
  """A delegate object used to handle certain operations while parsing."""

  __metaclass__ = abc.ABCMeta

  @abc.abstractmethod
  def configurable_reference(self, scoped_configurable_name, evaluate):
    """Called to construct an object representing a configurable reference.

    Args:
      scoped_configurable_name: The name of the configurable reference,
        including all scopes.
      evaluate: Whether the configurable reference should be evaluated.

    Returns:
      Should return an object representing the configurable reference.
    """
    pass

  @abc.abstractmethod
  def macro(self, macro_name):
    """Called to construct an object representing an macro.

    Args:
      macro_name: The name of the macro, including all scopes.

    Returns:
      Should return an object representing the macro.
    """
    pass


class BindingStatement(
    collections.namedtuple(
        'BindingStatement',
        ['scope', 'selector', 'arg_name', 'value', 'location'])):
  pass


class ImportStatement(
    collections.namedtuple('ImportStatement', ['module', 'location'])):
  pass


class IncludeStatement(
    collections.namedtuple('IncludeStatement', ['filename', 'location'])):
  pass


class ConfigParser(object):
  """A parser for configuration files/strings.

  This class implements a recursive descent parser for (sequences of) parameter
  binding strings. Each parameter binding string has the form:

      maybe/some/scope/configurable_name.parameter_name = value

  The `value` above may be any legal Python literal (list, tuple, dict, string,
  number, boolean, or None). Additionally, a "configurable reference" literal is
  supported, with the syntax

      @maybe/some/scope/configurable_name

  or

      @maybe/some/scope/configurable_name()

  The first form represents a reference to the configurable function itself,
  whereas the second form represents the result of calling the configurable
  function.

  This class doesn't assume a specific type for configurable function
  references, and instead allows the type to be specified at construction time.

  The primary method that should be called is `parse_binding`, which parses one
  parameter binding string. Additionally, a `parse_value` function is provided
  which just parses a literal value.

  This class implements the iterator interface, which makes it easy to iterate
  over multiple parameter bindings (newline separated) in a given file/string.

  Example:

      class DummyConfigurableReferenceType(object):
        def __init__(self, scoped_configurable_name, evaluate):
          pass

      f = open('/path/to/file.config')
      parser = config_parser.ConfigParser(f, DummyConfigurableReferenceType)
      config = {}
      for scoped_configurable_name, parameter_name, value in parser:
        config.setdefault(scoped_configurable_name, {})[parameter_name] = value
      f.close()
  """

  _TOKEN_FIELDS = ['kind', 'value', 'begin', 'end', 'line']

  class Token(collections.namedtuple('Token', _TOKEN_FIELDS)):

    @property
    def line_number(self):
      return self.begin[0]

  def __init__(self, string_or_filelike, parser_delegate):
    """Construct the parser.

    Args:
      string_or_filelike: Either the string to parse, or a file-like object
        supporting the readline method.
      parser_delegate: An instance of the ParserDelegate class, that will be
        responsible for constructing appropriate objects for configurable
        references and macros.
    """
    if hasattr(string_or_filelike, 'readline'):
      line_reader = string_or_filelike.readline
    else:  # Assume it's string-like.
      if six.PY2:
        string_or_filelike = unicode(string_or_filelike)
      string_io = io.StringIO(string_or_filelike)
      line_reader = string_io.readline

    def _text_line_reader():
      line = line_reader()
      if isinstance(line, bytes):
        line = line.decode('utf8')
      return line

    self._token_generator = tokenize.generate_tokens(_text_line_reader)
    self._filename = getattr(string_or_filelike, 'name', None)
    self._current_token = None
    self._delegate = parser_delegate
    self._advance_one_token()

  def __iter__(self):
    return self

  def __next__(self):
    return self.next()

  @property
  def current_token(self):
    return self._current_token

  def next(self):
    statement = self.parse_statement()
    if statement:
      return statement
    raise StopIteration

  def parse_statement(self):
    """Parse a single statement.

    Returns:
      Either a `BindingStatement`, `ImportStatement`, `IncludeStatement`, or
      `None` if no more statements can be parsed (EOF reached).
    """
    self._skip_whitespace_and_comments()
    if self._current_token.kind == tokenize.ENDMARKER:
      return None

    # Save off location, but ignore char_num for any statement-level errors.
    stmt_loc = self._current_location(ignore_char_num=True)
    binding_key_or_keyword = self._parse_selector()
    statement = None
    if self._current_token.value != '=':
      if binding_key_or_keyword == 'import':
        module = self._parse_selector(scoped=False)
        statement = ImportStatement(module, stmt_loc)
      elif binding_key_or_keyword == 'include':
        str_loc = self._current_location()
        success, filename = self._maybe_parse_basic_type()
        if not success or not isinstance(filename, str):
          self._raise_syntax_error('Expected file path as string.', str_loc)
        statement = IncludeStatement(filename, stmt_loc)
      else:
        self._raise_syntax_error("Expected '='.")
    else:  # We saw an '='.
      self._advance_one_token()
      value = self.parse_value()
      scope, selector, arg_name = parse_binding_key(binding_key_or_keyword)
      statement = BindingStatement(scope, selector, arg_name, value, stmt_loc)

    assert statement, 'Internal parsing error.'

    if (self._current_token.kind != tokenize.NEWLINE and
        self._current_token.kind != tokenize.ENDMARKER):
      self._raise_syntax_error('Expected newline.')
    elif self._current_token.kind == tokenize.NEWLINE:
      self._advance_one_token()

    return statement

  def parse_value(self):
    """Parse a single literal value.

    Returns:
      The parsed value.
    """
    parsers = [
        self._maybe_parse_container, self._maybe_parse_basic_type,
        self._maybe_parse_configurable_reference, self._maybe_parse_macro
    ]
    for parser in parsers:
      success, value = parser()
      if success:
        return value
    self._raise_syntax_error('Unable to parse value.')

  def _advance_one_token(self):
    self._current_token = ConfigParser.Token(*next(self._token_generator))
    # Certain symbols (e.g., "$") cause ERRORTOKENs on all preceding space
    # characters. Find the first non-space or non-ERRORTOKEN token.
    while (self._current_token.kind == tokenize.ERRORTOKEN and
           self._current_token.value in ' \t'):
      self._current_token = ConfigParser.Token(*next(self._token_generator))

  def advance_one_line(self):
    """Advances to next line."""

    current_line = self._current_token.line_number
    while current_line == self._current_token.line_number:
      self._current_token = ConfigParser.Token(*next(self._token_generator))

  def _skip_whitespace_and_comments(self):
    skippable_token_kinds = [
        tokenize.COMMENT, tokenize.NL, tokenize.INDENT, tokenize.DEDENT
    ]
    while self._current_token.kind in skippable_token_kinds:
      self._advance_one_token()

  def _advance(self):
    self._advance_one_token()
    self._skip_whitespace_and_comments()

  def _current_location(self, ignore_char_num=False):
    line_num, char_num = self._current_token.begin
    if ignore_char_num:
      char_num = None
    return (self._filename, line_num, char_num, self._current_token.line)

  def _raise_syntax_error(self, msg, location=None):
    if not location:
      location = self._current_location()
    raise SyntaxError(msg, location)

  def _parse_dict_item(self):
    key = self.parse_value()
    if self._current_token.value != ':':
      self._raise_syntax_error("Expected ':'.")
    self._advance()
    value = self.parse_value()
    return key, value

  def _parse_selector(self, scoped=True, allow_periods_in_scope=False):
    """Parse a (possibly scoped) selector.

    A selector is a sequence of one or more valid Python-style identifiers
    separated by periods (see also `SelectorMap`). A scoped selector is a
    selector that may be preceded by scope names (separated by slashes).

    Args:
      scoped: Whether scopes are allowed.
      allow_periods_in_scope: Whether to allow period characters in the scope
        names preceding the selector.

    Returns:
      The parsed selector (as a string).

    Raises:
      SyntaxError: If the scope or selector is malformatted.
    """
    if self._current_token.kind != tokenize.NAME:
      self._raise_syntax_error('Unexpected token.')

    begin_line_num = self._current_token.begin[0]
    begin_char_num = self._current_token.begin[1]
    end_char_num = self._current_token.end[1]
    line = self._current_token.line

    selector_parts = []
    # This accepts an alternating sequence of NAME and '/' or '.' tokens.
    step_parity = 0
    while (step_parity == 0 and self._current_token.kind == tokenize.NAME or
           step_parity == 1 and self._current_token.value in ('/', '.')):
      selector_parts.append(self._current_token.value)
      step_parity = not step_parity
      end_char_num = self._current_token.end[1]
      self._advance_one_token()
    self._skip_whitespace_and_comments()

    # Due to tokenization, most whitespace has been stripped already. To prevent
    # whitespace inside the scoped selector, we verify that it matches an
    # untokenized version of the selector obtained from the first through last
    # character positions of the consumed tokens in the line being parsed.
    scoped_selector = ''.join(selector_parts)
    untokenized_scoped_selector = line[begin_char_num:end_char_num]
    # Also check that it's properly formatted (e.g., no consecutive slashes).
    scope_re = IDENTIFIER_RE
    if allow_periods_in_scope:
      scope_re = MODULE_RE
    selector_re = MODULE_RE

    scope_parts = scoped_selector.split('/')
    valid_format = all(scope_re.match(scope) for scope in scope_parts[:-1])
    valid_format &= bool(selector_re.match(scope_parts[-1]))
    valid_format &= bool(scoped or len(scope_parts) == 1)
    if untokenized_scoped_selector != scoped_selector or not valid_format:
      location = (self._filename, begin_line_num, begin_char_num + 1, line)
      self._raise_syntax_error('Malformatted scope or selector.', location)

    return scoped_selector

  def _maybe_parse_container(self):
    """Try to parse a container type (dict, list, or tuple)."""
    bracket_types = {
        '{': ('}', dict, self._parse_dict_item),
        '(': (')', tuple, self.parse_value),
        '[': (']', list, self.parse_value)
    }
    if self._current_token.value in bracket_types:
      open_bracket = self._current_token.value
      close_bracket, type_fn, parse_item = bracket_types[open_bracket]
      self._advance()

      values = []
      saw_comma = False
      while self._current_token.value != close_bracket:
        values.append(parse_item())
        if self._current_token.value == ',':
          saw_comma = True
          self._advance()
        elif self._current_token.value != close_bracket:
          self._raise_syntax_error("Expected ',' or '%s'." % close_bracket)

      # If it's just a single value enclosed in parentheses without a trailing
      # comma, it's not a tuple, so just grab the value.
      if type_fn is tuple and len(values) == 1 and not saw_comma:
        type_fn = lambda x: x[0]

      self._advance()
      return True, type_fn(values)

    return False, None

  def _maybe_parse_basic_type(self):
    """Try to parse a basic type (str, bool, number)."""
    token_value = ''
    # Allow a leading dash to handle negative numbers.
    if self._current_token.value == '-':
      token_value += self._current_token.value
      self._advance()

    basic_type_tokens = [tokenize.NAME, tokenize.NUMBER, tokenize.STRING]
    continue_parsing = self._current_token.kind in basic_type_tokens
    if not continue_parsing:
      return False, None

    while continue_parsing:
      token_value += self._current_token.value

      try:
        value = ast.literal_eval(token_value)
      except Exception as e:  # pylint: disable=broad-except
        err_str = "{}\n    Failed to parse token '{}'"
        self._raise_syntax_error(err_str.format(e, token_value))

      was_string = self._current_token.kind == tokenize.STRING
      self._advance()
      is_string = self._current_token.kind == tokenize.STRING
      continue_parsing = was_string and is_string

    return True, value

  def _maybe_parse_configurable_reference(self):
    """Try to parse a configurable reference (@[scope/name/]fn_name[()])."""
    if self._current_token.value != '@':
      return False, None

    location = self._current_location()
    self._advance_one_token()
    scoped_name = self._parse_selector(allow_periods_in_scope=True)

    evaluate = False
    if self._current_token.value == '(':
      evaluate = True
      self._advance()
      if self._current_token.value != ')':
        self._raise_syntax_error("Expected ')'.")
      self._advance_one_token()
    self._skip_whitespace_and_comments()

    with utils.try_with_location(location):
      reference = self._delegate.configurable_reference(scoped_name, evaluate)

    return True, reference

  def _maybe_parse_macro(self):
    """Try to parse an macro (%scope/name)."""
    if self._current_token.value != '%':
      return False, None

    location = self._current_location()
    self._advance_one_token()
    scoped_name = self._parse_selector(allow_periods_in_scope=True)

    with utils.try_with_location(location):
      macro = self._delegate.macro(scoped_name)

    return True, macro


def parse_scoped_selector(scoped_selector):
  """Parse scoped selector."""
  # Conver Macro (%scope/name) to (scope/name/macro.value)
  if scoped_selector[0] == '%':
    if scoped_selector.endswith('.value'):
      err_str = '{} is invalid cannot use % and end with .value'
      raise ValueError(err_str.format(scoped_selector))
    scoped_selector = scoped_selector[1:] + '/macro.value'
  scope_selector_list = scoped_selector.rsplit('/', 1)
  scope = ''.join(scope_selector_list[:-1])
  selector = scope_selector_list[-1]
  return scope, selector


def parse_binding_key(binding_key):
  scope, selector = parse_scoped_selector(binding_key)
  selector_arg_name_list = selector.rsplit('.', 1)
  selector = ''.join(selector_arg_name_list[0])
  arg_name = ''.join(selector_arg_name_list[1:])
  return scope, selector, arg_name