Python sqlparse.tokens.Punctuation() Examples

The following are 30 code examples of sqlparse.tokens.Punctuation(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sqlparse.tokens , or try the search function .
Example #1
Source File: parseutils.py    From litecli with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def extract_tables(sql):
    """Extract the table names from an SQL statment.

    Returns a list of (schema, table, alias) tuples

    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return []

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == "insert"
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)
    return list(extract_table_identifiers(stream)) 
Example #2
Source File: grouping.py    From SublimeText-SQLTools with GNU General Public License v3.0 7 votes vote down vote up
def group_identifier_list(tlist):
    m_role = T.Keyword, ('null', 'role')
    sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
              sql.IdentifierList, sql.Operation)
    ttypes = (T_NUMERICAL + T_STRING + T_NAME +
              (T.Keyword, T.Comment, T.Wildcard))

    def match(token):
        return token.match(T.Punctuation, ',')

    def valid(token):
        return imt(token, i=sqlcls, m=m_role, t=ttypes)

    def post(tlist, pidx, tidx, nidx):
        return pidx, nidx

    valid_prev = valid_next = valid
    _group(tlist, sql.IdentifierList, match,
           valid_prev, valid_next, post, extend=True) 
Example #3
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _process(self, tlist):
        token = self._get_next_comment(tlist)
        while token:
            tidx = tlist.token_index(token)
            prev = tlist.token_prev(tidx, False)
            next_ = tlist.token_next(tidx, False)
            # Replace by whitespace if prev and next exist and if they're not
            # whitespaces. This doesn't apply if prev or next is a paranthesis.
            if (prev is not None and next_ is not None
                and not prev.is_whitespace() and not next_.is_whitespace()
                and not (prev.match(T.Punctuation, '(')
                         or next_.match(T.Punctuation, ')'))):
                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
            else:
                tlist.tokens.pop(tidx)
            token = self._get_next_comment(tlist) 
Example #4
Source File: filters.py    From codenn with MIT License 6 votes vote down vote up
def _process(self, tlist):
        token = self._get_next_comment(tlist)
        while token:
            tidx = tlist.token_index(token)
            prev = tlist.token_prev(tidx, False)
            next_ = tlist.token_next(tidx, False)
            # Replace by whitespace if prev and next exist and if they're not
            # whitespaces. This doesn't apply if prev or next is a paranthesis.
            if (prev is not None and next_ is not None
                and not prev.is_whitespace() and not next_.is_whitespace()
                and not (prev.match(T.Punctuation, '(')
                         or next_.match(T.Punctuation, ')'))):
                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
            else:
                tlist.tokens.pop(tidx)
            token = self._get_next_comment(tlist) 
Example #5
Source File: mysql_handler.py    From schematizer with Apache License 2.0 6 votes vote down vote up
def _get_create_definition_tokens(self, stmt):
        lparen_token = stmt.token_next_by_type(0, T.Punctuation)
        if not lparen_token or lparen_token.value != '(':
            yield

        index = stmt.token_index(lparen_token)
        def_tokens = []
        for token in stmt.tokens[index + 1:]:
            if token.value == ')':
                break

            if isinstance(token, sql.ColumnsDefinition):
                yield token
            elif token.match(T.Punctuation, ','):
                yield def_tokens
                def_tokens = []
            elif not token.is_whitespace():
                def_tokens.append(token)

        if def_tokens:
            yield def_tokens 
Example #6
Source File: grouping.py    From SublimeText-SQLTools with GNU General Public License v3.0 6 votes vote down vote up
def group_period(tlist):
    def match(token):
        return token.match(T.Punctuation, '.')

    def valid_prev(token):
        sqlcls = sql.SquareBrackets, sql.Identifier
        ttypes = T.Name, T.String.Symbol
        return imt(token, i=sqlcls, t=ttypes)

    def valid_next(token):
        # issue261, allow invalid next token
        return True

    def post(tlist, pidx, tidx, nidx):
        # next_ validation is being performed here. issue261
        sqlcls = sql.SquareBrackets, sql.Function
        ttypes = T.Name, T.String.Symbol, T.Wildcard
        next_ = tlist[nidx] if nidx is not None else None
        valid_next = imt(next_, i=sqlcls, t=ttypes)

        return (pidx, nidx) if valid_next else (pidx, tidx)

    _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) 
Example #7
Source File: sql_util.py    From editsql with MIT License 6 votes vote down vote up
def strip_whitespace_front(token_list):
    new_token_list = []
    found_valid = False

    for token in token_list:
        if not (token.is_whitespace or token.ttype ==
                token_types.Punctuation) or found_valid:
            found_valid = True
            new_token_list.append(token)

    return new_token_list

# strip_whitespace
# Strips whitespace from a token list.
#
# Inputs:
#    token_list: the token list.
#
# Outputs:
#    new token list with no whitespace/punctuation surrounding. 
Example #8
Source File: sql_util.py    From atis with MIT License 6 votes vote down vote up
def strip_whitespace_front(token_list):
    new_token_list = []
    found_valid = False

    for token in token_list:
        if not (token.is_whitespace or token.ttype ==
                token_types.Punctuation) or found_valid:
            found_valid = True
            new_token_list.append(token)

    return new_token_list

# strip_whitespace
# Strips whitespace from a token list.
#
# Inputs:
#    token_list: the token list.
#
# Outputs:
#    new token list with no whitespace/punctuation surrounding. 
Example #9
Source File: tables.py    From pgcli with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def extract_tables(sql):
    """Extract the table names from an SQL statment.

    Returns a list of TableReference namedtuples

    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return ()

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == "insert"
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)

    # Kludge: sqlparse mistakenly identifies insert statements as
    # function calls due to the parenthesized column list, e.g. interprets
    # "insert into foo (bar, baz)" as a function call to foo with arguments
    # (bar, baz). So don't allow any identifiers in insert statements
    # to have is_function=True
    identifiers = extract_table_identifiers(stream, allow_functions=not insert_stmt)
    # In the case 'sche.<cursor>', we get an empty TableReference; remove that
    return tuple(i for i in identifiers if i.name) 
Example #10
Source File: extract_tables.py    From Archery with Apache License 2.0 6 votes vote down vote up
def extract_tables(sql):
    """Extract the table names from an SQL statment.
    Returns a list of TableReference namedtuples
    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return ()

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == "insert"
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)

    # Kludge: sqlparse mistakenly identifies insert statements as
    # function calls due to the parenthesized column list, e.g. interprets
    # "insert into foo (bar, baz)" as a function call to foo with arguments
    # (bar, baz). So don't allow any identifiers in insert statements
    # to have is_function=True
    identifiers = extract_table_identifiers(stream, allow_functions=not insert_stmt)
    # In the case 'sche.<cursor>', we get an empty TableReference; remove that
    return tuple(i for i in identifiers if i.name) 
Example #11
Source File: sql_parse.py    From incubator-superset with Apache License 2.0 6 votes vote down vote up
def _extract_limit_from_query(statement: TokenList) -> Optional[int]:
    """
    Extract limit clause from SQL statement.

    :param statement: SQL statement
    :return: Limit extracted from query, None if no limit present in statement
    """
    idx, _ = statement.token_next_by(m=(Keyword, "LIMIT"))
    if idx is not None:
        _, token = statement.token_next(idx=idx)
        if token:
            if isinstance(token, IdentifierList):
                # In case of "LIMIT <offset>, <limit>", find comma and extract
                # first succeeding non-whitespace token
                idx, _ = token.token_next_by(m=(sqlparse.tokens.Punctuation, ","))
                _, token = token.token_next(idx=idx)
            if token and token.ttype == sqlparse.tokens.Literal.Number.Integer:
                return int(token.value)
    return None 
Example #12
Source File: others.py    From SublimeText-SQLTools with GNU General Public License v3.0 6 votes vote down vote up
def _process(tlist):
        def get_next_comment():
            # TODO(andi) Comment types should be unified, see related issue38
            return tlist.token_next_by(i=sql.Comment, t=T.Comment)

        tidx, token = get_next_comment()
        while token:
            pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
            nidx, next_ = tlist.token_next(tidx, skip_ws=False)
            # Replace by whitespace if prev and next exist and if they're not
            # whitespaces. This doesn't apply if prev or next is a paranthesis.
            if (prev_ is None or next_ is None or
                    prev_.is_whitespace or prev_.match(T.Punctuation, '(') or
                    next_.is_whitespace or next_.match(T.Punctuation, ')')):
                tlist.tokens.remove(token)
            else:
                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')

            tidx, token = get_next_comment() 
Example #13
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __custom_process_parenthesis_order(self, parenthesis):
        open_punc = parenthesis.token_next_match(0, T.Punctuation, '(')
        close_punc = parenthesis.token_next_match(open_punc, T.Punctuation, ')')

        self.indent += 2
        parenthesis.insert_after(open_punc, self.nl())

        for token in parenthesis.tokens_between(open_punc, close_punc)[1:-1]:
            if isinstance(token, Phrase):
                parenthesis.insert_before(token, self.nl())
                self._process_phrase(token, kwds=False)
                parenthesis.insert_after(token, self.nl_with_indent(1))
            elif isinstance(token, sql.Identifier) and len(token.tokens)==1 and isinstance(token.tokens[0], Phrase):
                # 中がPhraseのIdentifier
                child_token = token.tokens[0]
                parenthesis.insert_before(token, self.nl())
                self._process_phrase(child_token, kwds=False)
                parenthesis.insert_after(token, self.nl_with_indent(1))
            elif token.is_group():
                self._process(token)

        self.indent -= 1
        parenthesis.insert_before(close_punc, self.nl())
        self.indent -= 1 
Example #14
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _stripws_default(self, tlist):
        last_was_ws = False
        last_ws_token = None
        last_was_punctuation = False
        for token in tlist.tokens[:]:
            if token.is_whitespace():
                if last_was_ws or last_was_punctuation:  # 前tokenがwhitespaceまたはPunctuationの場合、空白を除去
                    tlist.tokens.remove(token)
                    continue
                else:
                    token.value = "\t"
            if tu.is_punctuation(token):
                if last_ws_token:
                    tlist.tokens.remove(last_ws_token) # Punctuation前のwhitespaceを除去
            last_was_ws = token.is_whitespace()
            last_ws_token = token if last_was_ws else None
            last_was_punctuation = tu.is_punctuation(token)

        self.__custom_stripws_tokenlist(tlist) 
Example #15
Source File: tokenutils.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def is_dmlddl_parenthesis(token):
    """
        DMLかDDLの括弧判定
    """
    if not is_parenthesis(token):
        return False


    open_punc = token.token_next_match(0, T.Punctuation, '(')
    first = token_next_enable(token, open_punc)
    if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
        return True

    if is_with(first):
        return True

    if is_parenthesis(first):
        return is_dmlddl_parenthesis(first)

    return False 
Example #16
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __process_parenthesis_for_join_using(self, tlist):
        open_punc = tlist.token_next_match(0, T.Punctuation, '(')
        tlist.insert_after(open_punc, self.nl_with_indent(1))
        self._process_default(tlist)

        close_punc = tlist.token_next_match(open_punc, T.Punctuation, ')')
        tlist.insert_before(close_punc, self.nl()) 
Example #17
Source File: parseutils.py    From athenacli with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def extract_tables(sql):
    """Extract the table names from an SQL statment.
    Returns a list of (schema, table, alias) tuples
    """
    parsed = sqlparse.parse(sql)
    if not parsed:
        return []

    # INSERT statements must stop looking for tables at the sign of first
    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
    # abc is the table name, but if we don't stop at the first lparen, then
    # we'll identify abc, col1 and col2 as table names.
    insert_stmt = parsed[0].token_first().value.lower() == 'insert'
    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)
    return list(extract_table_identifiers(stream)) 
Example #18
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __process_parenthesis_for_with_query_cols(self, tlist):
        """
            WITHのqueryカラム名
        """
        open_punc = tlist.token_next_match(0, T.Punctuation, '(')
        self.indent += 1
        tlist.insert_after(open_punc, self.nl())
        self._process_default(tlist)

        close_punc = tlist.token_next_match(open_punc, T.Punctuation, ')')
        tlist.insert_before(close_punc, self.nl())
        self.indent -= 1 
Example #19
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __process_parenthesis_for_insert(self, tlist):
        open_punc = tlist.token_next_match(0, T.Punctuation, '(')
        tlist.insert_after(open_punc, self.nl())
        self._process_default(tlist)

        close_punc = tlist.token_next_match(open_punc, T.Punctuation, ')')
        tlist.insert_before(close_punc, self.nl()) 
Example #20
Source File: sql.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_identifiers(self):
        """Returns the identifiers.

        Whitespaces and punctuations are not included in this generator.
        """
        for x in self.tokens:
            if not x.is_whitespace() and not x.match(T.Punctuation, ','):
                yield x 
Example #21
Source File: sql.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_typecast(self):
        """Returns the typecast or ``None`` of this object as a string."""
        marker = self.token_next_match(0, T.Punctuation, '::')
        if marker is None:
            return None
        next_ = self.token_next(self.token_index(marker), False)
        if next_ is None:
            return None
        return str(next_) 
Example #22
Source File: sql.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_parent_name(self):
        """Return name of the parent object if any.

        A parent object is identified by the first occuring dot.
        """
        dot = self.token_next_match(0, T.Punctuation, '.')
        if dot is None:
            return None
        prev_ = self.token_prev(self.token_index(dot))
        if prev_ is None:  # something must be verry wrong here..
            return None
        return self._remove_quotes(prev_.value) 
Example #23
Source File: sql.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_real_name(self):
        """Returns the real name (object name) of this identifier."""
        # a.b
        dot = self.token_next_match(0, T.Punctuation, '.')
        if dot is not None:
            return self._get_first_name(self.token_index(dot))

        return self._get_first_name() 
Example #24
Source File: grouping.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
                    cls, include_semicolon=False, recurse=False):

    #bugfix recurse
    # [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
    #                  cls, include_semicolon) for sgroup in tlist.get_sublists()
    [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
                     cls, include_semicolon, recurse) for sgroup in tlist.get_sublists()
     if recurse]
    if isinstance(tlist, cls):
        idx = 1
    else:
        idx = 0
    token = tlist.token_next_match(idx, start_ttype, start_value)
    while token:
        tidx = tlist.token_index(token)
        end = _find_matching(tidx, tlist, start_ttype, start_value,
                             end_ttype, end_value)
        if end is None:
            idx = tidx + 1
        else:
            if include_semicolon:
                next_ = tlist.token_next(tlist.token_index(end))
                if next_ and next_.match(T.Punctuation, ';'):
                    end = next_
            group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
            _group_matching(group, start_ttype, start_value,
                            end_ttype, end_value, cls, include_semicolon)
            idx = tlist.token_index(group) + 1
        token = tlist.token_next_match(idx, start_ttype, start_value) 
Example #25
Source File: grouping.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _group_left_right(tlist, ttype, value, cls,
                      check_right=lambda t: True,
                      check_left=lambda t: True,
                      include_semicolon=False):
    [_group_left_right(sgroup, ttype, value, cls, check_right, check_left,
                       include_semicolon) for sgroup in tlist.get_sublists()
     if not isinstance(sgroup, cls)]
    idx = 0
    token = tlist.token_next_match(idx, ttype, value)
    while token:
        right = tlist.token_next(tlist.token_index(token))
        left = tlist.token_prev(tlist.token_index(token))
        if right is None or not check_right(right):
            token = tlist.token_next_match(tlist.token_index(token) + 1,
                                           ttype, value)
        elif left is None or not check_left(left):
            token = tlist.token_next_match(tlist.token_index(token) + 1,
                                           ttype, value)
        else:
            if include_semicolon:
                sright = tlist.token_next_match(tlist.token_index(right),
                                                T.Punctuation, ';')
                if sright is not None:
                    # only overwrite "right" if a semicolon is actually
                    # present.
                    right = sright
            tokens = tlist.tokens_between(left, right)[1:]
            if not isinstance(left, cls):
                new = cls([left])
                new_idx = tlist.token_index(left)
                tlist.tokens.remove(left)
                tlist.tokens.insert(new_idx, new)
                left = new
            left.tokens.extend(tokens)
            for t in tokens:
                tlist.tokens.remove(t)
            token = tlist.token_next_match(tlist.token_index(left) + 1,
                                           ttype, value) 
Example #26
Source File: filter.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def process(self, stack, stream):
        "Process the stream"
        consume_ws = False
        splitlevel = 0
        stmt = None
        stmt_tokens = []

        # Run over all stream tokens
        for ttype, value in stream:
            # Yield token if we finished a statement and there's no whitespaces
            if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
                stmt.tokens = stmt_tokens
                yield stmt

                # Reset filter and prepare to process next statement
                self._reset()
                consume_ws = False
                splitlevel = 0
                stmt = None

            # Create a new statement if we are not currently in one of them
            if stmt is None:
                stmt = Statement()
                stmt_tokens = []

            # Change current split level (increase, decrease or remain equal)
            splitlevel += self._change_splitlevel(ttype, value)

            # Append the token to the current statement
            stmt_tokens.append(Token(ttype, value))

            # Check if we get the end of a statement
            if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
                consume_ws = True

        # Yield pending statement (if any)
        if stmt is not None:
            stmt.tokens = stmt_tokens
            yield stmt 
Example #27
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _process_parenthesis(self, tlist):
        first = tlist.token_next(0)
        indented = False
        if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
            self.indent += 1
            tlist.tokens.insert(0, self.nl())
            indented = True
        num_offset = self._get_offset(
            tlist.token_next_match(0, T.Punctuation, '('))
        self.offset += num_offset
        self._process_default(tlist, stmts=not indented)
        if indented:
            self.indent -= 1
        self.offset -= num_offset 
Example #28
Source File: filters.py    From uroboroSQL-formatter with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _stripws_identifierlist(self, tlist):
        # Removes newlines before commas, see issue140
        last_nl = None
        for token in tlist.tokens[:]:
            if (token.ttype is T.Punctuation
                and token.value == ','
                and last_nl is not None):
                tlist.tokens.remove(last_nl)
            if token.is_whitespace():
                last_nl = token
            else:
                last_nl = None
        return self._stripws_default(tlist) 
Example #29
Source File: sql_parse.py    From incubator-superset with Apache License 2.0 5 votes vote down vote up
def _get_table(tlist: TokenList) -> Optional[Table]:
        """
        Return the table if valid, i.e., conforms to the [[catalog.]schema.]table
        construct.

        :param tlist: The SQL tokens
        :returns: The table if the name conforms
        """

        # Strip the alias if present.
        idx = len(tlist.tokens)

        if tlist.has_alias():
            ws_idx, _ = tlist.token_next_by(t=Whitespace)

            if ws_idx != -1:
                idx = ws_idx

        tokens = tlist.tokens[:idx]

        if (
            len(tokens) in (1, 3, 5)
            and all(imt(token, t=[Name, String]) for token in tokens[::2])
            and all(imt(token, m=(Punctuation, ".")) for token in tokens[1::2])
        ):
            return Table(*[remove_quotes(token.value) for token in tokens[::-2]])

        return None 
Example #30
Source File: sql_metadata.py    From sql-metadata with MIT License 5 votes vote down vote up
def get_query_limit_and_offset(query: str) -> Optional[Tuple[int, int]]:
    """
    :type query str
    :rtype: (int, int)
    """
    limit = None
    offset = None
    last_keyword = None
    last_token = None

    # print(query)
    for token in get_query_tokens(query):
        # print([token, token.ttype, last_keyword])

        if token.is_keyword and token.value.upper() in ['LIMIT', 'OFFSET']:
            last_keyword = token.value.upper()
        elif token.ttype is Number.Integer:
            # print([token, last_keyword, last_token_was_integer])
            if last_keyword == 'LIMIT':
                # LIMIT <limit>
                limit = int(token.value)
                last_keyword = None
            elif last_keyword == 'OFFSET':
                # OFFSET <offset>
                offset = int(token.value)
                last_keyword = None
            elif last_token and last_token.ttype is Punctuation:
                # LIMIT <offset>,<limit>
                offset = limit
                limit = int(token.value)

        last_token = token

    if limit is None:
        return None

    return limit, offset or 0


# SQL queries normalization (#16)