python source code of regex

__author__    = "Radical.Utils Development Team (Andre Merzky)"
__copyright__ = "Copyright 2013, RADICAL@Rutgers"
__license__   = "MIT"


import re


# ------------------------------------------------------------------------------
#
class ReSult (object) :
    """
    This class is a container around a regular expression match, which provides
    some more conventient access methods, boolean tests, etc.

    We only handle base strings, not unicode strings!
    """

    # --------------------------------------------------------------------------
    #
    def __init__ (self, result=None) :
        """
        construct with a `re.MatchObject` instance.  This ctor should only be
        called from within the `ReString` class.
        """
        self._glist = list()
        self._gdict = dict()


        if  result : 
            if  not isinstance (result, type(re.match("",""))) :  # fuck python 
                raise TypeError ('Need re.MatchObject on construction, not %s' % type(result))

            self._glist = result.groups ()
            self._gdict = result.groupdict ()


    # --------------------------------------------------------------------------
    #
    def __str__ (self) :
        """
        The string representation is based on the match *list*, as the dict may
        not include all matches...
        """
        return str(self._glist)


    # --------------------------------------------------------------------------
    #
    def __len__ (self) :
        """
        The len representation is based on the match *list*, as the dict may
        not include all matches...
        """
        return len(self._glist)


    # --------------------------------------------------------------------------
    #
    def get (self, key, default=None) :
        """
        get is supported for default based dict access,
        """
   
        if  isinstance (key, str) :
            return self._gdict.get (key, default)
        else :
            raise TypeError ("key %s needs to be integer, not %s"
                          % (key, type(key)))


    # --------------------------------------------------------------------------
    #
    def __getitem__ (self, idx) :
        """
        getitem is supported for both array type access (using an int index),
        and for dict type access (using a string name).  All other key types
        will cause an exception.
        """
   
        if  isinstance (idx, str) :
            if  idx in self._gdict :
                return self._gdict[idx]
        elif  isinstance (idx, int) :
            if  len(self) > idx :
                return self._glist[idx]
        else :
            raise TypeError ("index %s needs to be integer or string, not %s"
                          % (idx, type(idx)))
        return None


    # --------------------------------------------------------------------------
    # 
    def __iter__(self):
        """
        the matches can be iterated over
        """
        for m in self._glist :
            yield m

    # --------------------------------------------------------------------------
    #
    def __getattr__ (self, name) :
        """
        Matches can be accessed as properties
        """
        return self[name]


    # --------------------------------------------------------------------------
    #
    def __nonzero__ (self) :
        """
        Boolean check for 'if / elif / else' constructs 
        """
        if  len(self) :
            return True
        return False


    # --------------------------------------------------------------------------
    #
    def __enter__ (self) :
        """
        support context manager interface for with-statement based constructs
        """
        return self


    # --------------------------------------------------------------------------
    #
    def __exit__ (self, a, b, c) :
        """
        second part of the context manager interface
        """
        pass



    # --------------------------------------------------------------------------
    #
    def __cmp__ (self, other) :
        """
        compare to another ReSult or to a tuple.  As they are both iterable, we
        compare based on the iterable interface
        """
        if  isinstance (other, str) :
            # we consider a single string to be an interable of one element
            other = [str(other)]

        import collections 
        if  isinstance (other, collections.Iterable) :

            if  len (self) != len (other) :
                return len (self) - len (other)

            for i, m in enumerate (self) :
                if m != other[i] :
                    print '%s != %s' % (m, other[i])
                    return len(m) - len(other[i])
            return 0

        raise TypeError ('compare expected iterable or single string type, not %s' % type (other))



# ------------------------------------------------------------------------------
class ReString (str) :
    """
    This is a string class which supports simplified regular expression
    matching. It is not thought that the regex language or expressions are
    simplified, but rather that the invokation of the matching is simple, as is
    the handling of the match results:

        txt = ReString ("The quick brown fox jumps over the lazy dog")
        
        # the '//' operator is overloaded to match against a regular expression.
        # The result is a `ReSult` instance, which allows simple access to the
        # matches
        with txt // '(\s.u)(?P<x>.*?j\S+)' as res :
            if res : print 'Matched!'               # boolean check
            print "res      : '%s' " % res          # list of results
            print "res[0]   : '%s' " % res[0]       # index by number ...
            print "res[1]   : '%s' " % res[1]       # ... for all matches
            print "res['x'] : '%s' " % res['x']     # index by match name
            print "res.x    : '%s' " % res.x        # ...   as properties
            for i, r in enumerate (res) :
                print "res %d    : '%s' " % (i, r)  # matches as iterable
        
            assert (len(res) == 2)                  # number of matches
            assert (res == [' qu', 'ick brown fox jumps'])  # compare to list
        
        if  txt // '(rabbit)' :                     # simple use in if / elif / ...
            res = txt.get ()                        # get ReSult of last match
        
        elif  txt // '((?:\s).{12,15}?(\S+))' :     # support for full Python regex slang
            res = txt.get ()
        
        else :
            print 'no match'
    """

    # --------------------------------------------------------------------------
    def __init__ (self, src) :

        self._result = None

        str.__init__ (self, src)


    # --------------------------------------------------------------------------
    def __floordiv__ (self, regex) :

        compiled_regex = None
        if  isinstance (regex, basestring) :
            compiled_regex = re.compile (regex)
        else :
            # assume we got a compiled regex
            # FIXME: type check
            compiled_regex = regex

        if re :
            self._result = ReSult (re.search (compiled_regex, self))
            return self._result

        return None


    # --------------------------------------------------------------------------
    def get (self, key=None, default=None) :

        if  self._result and key :

            try :
                return self._result[key]

            except KeyError :
                if  default :
                    return default
                raise

        return self._result


# ------------------------------------------------------------------------------
# 
def _example_re_string () :

    txt = ReString ("The quick brown fox jumps over the lazy dog")
    
    with txt // '(\s.u)(?P<x>.*?j\S+)' as res :

        if res : print 'Matched!'               # boolean check
        print "res      : '%s' " % res          # list of results
        print "res[0]   : '%s' " % res[0]       # index by number ...
        print "res[1]   : '%s' " % res[1]       # ... for all matches
        print "res['x'] : '%s' " % res['x']     # index by match name
        print "res.x    : '%s' " % res.x        # ...   as properties
        for i, r in enumerate (res) :
            print "res %d    : '%s' " % (i, r)  # matches as iterable

        assert (len(res) == 2)                  # number of matches
        assert (res == [' qu', 'ick brown fox jumps'])  # compare to list
    
    
    if  txt // '(rabbit)' :                     # simple use in if / elif / ...
        res = txt.get ()                        # get ReSult of last match
    
    elif  txt // '((?:\s).{12,15}?(\S+))' :     # support for full Python regex slang
        res = txt.get ()
    
    else :
        print 'no match'


# ------------------------------------------------------------------------------
# 
def _test_re_string () :

    txt   = ReString ("The quick brown fox jumps over the lazy dog")
    tgt_l = [' qu', 'ick brown fox jumps']
    tgt_d = {'x'  : 'ick brown fox jumps'}
    
    with txt // '(\s.u)(?P<x>.*?j\S+)' as res :
        assert (res)
        assert (len(res) == len(tgt_l))
        assert (res      == tgt_l), "%s != %s" % (str(res), str(tgt_l))
        assert (res[0]   == tgt_l[0])
        assert (res[1]   == tgt_l[1])
        assert (res['x'] == tgt_d['x'])
        assert (res.x    == tgt_d['x'])
        for i, r in enumerate (res) :
            assert (r    == tgt_l[i])
    
    if  txt // '(rabbit)' :
        assert (False)
    
    elif  txt // '((?:\s).{12,15}?(\S+))' :     # support for full Python regex slang
        assert (True)
    
    else :
        assert (False)


# ------------------------------------------------------------------------------

# _example_re_string()
# _test_re_string()

# ------------------------------------------------------------------------------