python source code of tokenutils

# coding:utf-8
'''
@author: ota
'''
import re
from sqlparse import sql, tokens as T
from enum import Enum


class EngineComment(Enum):
    """
        SQLエンジン関連コメントType
    """
    none = 0 # SQLエンジン関連コメントではない
    syntax = 1 # ロジック
    param = 2 # パラメータ
    sql_identifier = 3 # SQL_IDENTIFIER

def get_comment_type(token, comment_syntax):
    """
        SQLエンジン関連コメントTypeを返す
    """
    if is_block_comment(token):
        return comment_syntax.get_block_comment_type(token)
    elif is_line_comment(token):
        return comment_syntax.get_line_comment_type(token)

def is_param_comment(token, next_token, comment_syntax):
    """
        SQLエンジンのパラメータコメント判定
    """
    return get_comment_type(token, comment_syntax) == EngineComment.param \
        and (is_literal(next_token) or is_wildcard(next_token) or is_parenthesis(next_token))

def is_hint_block_comment(token):
    """
        Oracleヒントコメント判定
    """
    if is_block_comment(token):
        tokens = token.tokens
        if len(tokens) >= 3 :
            comment = tokens[1].value
            if comment.startswith("+"):
                return True
    return False

def is_block_comment(token):
    """
        ブロックコメント判定
    """
    if is_comment(token):
        comment = token.token_next_by_type(0, T.Comment)
        return comment.value in ["/*", "*/"]

    return False

def is_line_comment(token):
    """
        ラインコメント判定
    """
    if is_comment(token):
        comment = token.token_next_by_type(0, T.Comment)
        return comment.value not in ["/*", "*/"]
    return False

def is_plain_line_comment(token, comment_syntax):
    """
        ラインコメント（SQLエンジン構文ではない）判定
    """
    return is_line_comment(token) and get_comment_type(token, comment_syntax) == EngineComment.none

def is_line_description_line_comment(token, comment_syntax):
    """
        ラインコメント（行説明になりうる）判定
    """
    return is_plain_line_comment(token, comment_syntax) and token.is_line_description

def is_comment(token):
    """
        コメント判定
    """
    return isinstance(token, sql.Comment)

def is_dot(token):
    """
        ドット判定
    """
    return is_punctuation(token) and token.value == "."

def is_comma(token):
    """
        カンマ判定
    """
    return is_punctuation(token) and token.value == ","

def is_literal(token):
    """
        リテラル判定（文字列・数値）
    """
    return token.ttype in T.Literal

def is_string_literal(token):
    """
        リテラル判定（文字列）
    """
    return token.ttype in T.Literal.String

def is_number_literal(token):
    """
        リテラル判定（数値）
    """
    return token.ttype in T.Literal.Number

def is_null_keyword(token):
    """
        「NULL」文字列判定
    """
    return token.match(T.Keyword, "NULL")

def is_comparison(token):
    """
        比較演算判定
    """
    return isinstance(token, sql.Comparison)

def is_identifier_list(token):
    """
        IdentifierList判定
    """
    return isinstance(token, sql.IdentifierList)

def is_identifier(token):
    """
        Identifier判定
    """
    return isinstance(token, sql.Identifier)

def is_function(token):
    """
        関数判定
    """
    return isinstance(token, sql.Function)

def is_value_candidate(token):
    """
        値になりうる
    """
    return is_string_candidate(token) or is_number_candidate(token)


def is_string_candidate(token):
    """
        文字列になりうる
    """
    if is_string_literal(token):
        return True
    if is_function(token):
        return True
    if is_null_keyword(token):
        return True
    if is_calculation(token):
        return True
    if is_parenthesis(token):
        tokens = [t for t in tokens_parenthesis_inner(token) if is_enable(t)]
        if len(tokens) == 1:
            return is_string_candidate(tokens[0])
        elif tokens:
            return is_select_dml(tokens[0])
    if is_identifier(token):
        tokens = [t for t in token.tokens if is_enable(t)]
        for tkn in tokens:
            if (not tkn.ttype in T.Name) and (not is_dot(tkn)):
                return False
        return True

    return False

def is_number_candidate(token):
    """
        数値になりうる
    """
    if is_number_literal(token):
        return True
    if is_function(token):
        return True
    if is_null_keyword(token):
        return True
    if is_calculation(token):
        return True
    if is_parenthesis(token):
        tokens = [t for t in tokens_parenthesis_inner(token) if is_enable(t)]
        if len(tokens) == 1:
            return is_number_candidate(tokens[0])
        elif tokens:
            return is_select_dml(tokens[0])
    if is_identifier(token):
        tokens = [t for t in token.tokens if is_enable(t)]
        for tkn in tokens:
            if (not tkn.ttype in T.Name) and (not is_dot(tkn)):
                return False
        return True

    return False

def is_exists_function(token):
    """
        EXISTS関数判定
    """
    if not is_function(token):
        return False
    ftoken = token_next_enable(token)
    return equals_ignore_case(ftoken.value, "EXISTS")

def is_over_function(token):
    """
        OVER関数判定
    """
    if not is_function(token):
        return False
    ftoken = token_next_enable(token)
    return equals_ignore_case(ftoken.value, "OVER")

def is_parenthesis(token):
    """
        括弧判定
    """
    return isinstance(token, sql.Parenthesis)

def is_dmlddl_parenthesis(token):
    """
        DMLかDDLの括弧判定
    """
    if not is_parenthesis(token):
        return False


    open_punc = token.token_next_match(0, T.Punctuation, '(')
    first = token_next_enable(token, open_punc)
    if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
        return True

    if is_with(first):
        return True

    if is_parenthesis(first):
        return is_dmlddl_parenthesis(first)

    return False

def is_enum_parenthesis(token):
    """
        括弧の中身が値の列挙かどうかの判定
    """
    if not is_parenthesis(token):
        return False

    def is_enums(tokens):
        for token in tokens:
            if token.is_whitespace() \
                or is_comment(token) \
                or is_comma(token) \
                or is_literal(token) \
                or is_null_keyword(token) \
                or is_identifier(token):
                pass
            elif is_identifier_list(token):
                if not is_enums(token.tokens):
                    return False
            else:
                return False
        return True

    return is_enums(tokens_parenthesis_inner(token))

def is_comparisons_parenthesis(token):
    """
        括弧の中身が比較演算かどうかの判定
    """
    if not is_parenthesis(token):
        return False

    exists_logical_operator = False
    exists_comparison_operator = False
    exists_parenthesis = False
    exists_exists_function = False
    prev_enable = None
    for tkn in tokens_parenthesis_inner(token):
        if is_comparison(tkn):
            return True
        if is_logical_operator_keyword(tkn):
            exists_logical_operator = True
        if is_comparison_operator(tkn):
            exists_comparison_operator = True
        if prev_enable and get_comparison_operator_words(prev_enable, tkn):
            exists_comparison_operator = True
        if is_parenthesis(tkn):
            exists_parenthesis = True
        if is_exists_function(tkn):
            exists_exists_function = True
        if exists_logical_operator and exists_comparison_operator:
            return True
        if exists_logical_operator and exists_parenthesis:
            return True
        if exists_logical_operator and exists_exists_function:
            return True
        if is_enable(tkn):
            prev_enable = tkn

    return False


def is_punctuation(token):
    """
        Punctuation判定
    """
    return token.ttype in T.Punctuation

def is_semicolon_punctuation(token):
    """
        セミコロン判定
    """
    return is_punctuation(token) and token.value == ";"

def is_open_punctuation(token):
    """
        開き括弧判定
    """
    return is_punctuation(token) and token.value == "("

def is_close_punctuation(token):
    """
        閉じ括弧判定
    """
    return is_punctuation(token) and token.value == ")"

def is_keyword(token):
    """
        keyword判定
    """
    return token.is_keyword

def is_as_keyword(token):
    """
        「AS」判定
    """
    return token.match(T.Keyword, "AS")

def is_distinct_keyword(token):
    """
        「DISTINCT」判定
    """
    return token.match(T.Keyword, "DISTINCT")

def is_from_keyword(token):
    """
        「FROM」判定
    """
    return token.match(T.Keyword, "FROM")

def is_by_keyword(token):
    """
        「BY」判定
    """
    return token.match(T.Keyword, "BY")

def is_select_dml(token):
    """
        SELECT句判定
    """
    return token.match(T.DML, "SELECT")

def is_update_dml(token):
    """
        UPDATE句判定
    """
    return token.match(T.DML, "UPDATE")

def is_insert_dml(token):
    """
        INSERT句判定
    """
    return token.match(T.DML, "INSERT")

def is_delete_dml(token):
    """
        DELETE句判定
    """
    return token.match(T.DML, "DELETE")

def is_with(token):
    """
        WITH句判定
    """
    from uroborosqlfmt.sql import With
    return isinstance(token, With)

def is_into_keyword(token):
    """
        INTO判定
    """
    return token.match(T.Keyword, "INTO")

def is_values_keyword(token):
    """
        VALUES判定
    """
    return token.match(T.Keyword, "VALUES")

def is_set_keyword(token):
    """
        SET判定
    """
    return token.match(T.Keyword, "SET")

def is_dml(token):
    """
        DML判定
    """
    return token.ttype in T.DML

def is_wildcard(token):
    """
        ワイルドカード「*」判定
    """
    return token.ttype in T.Wildcard

def is_where(token):
    """
        WHERE句判定
    """
    return isinstance(token, sql.Where)

def is_when(token):
    """
        WHEN句判定
    """
    from uroborosqlfmt.sql import When
    return isinstance(token, When)

def is_having(token):
    """
        HAVING句判定
    """
    from uroborosqlfmt.sql import Having
    return isinstance(token, Having)

def is_on(token):
    """
        ON句判定
    """
    from uroborosqlfmt.sql import On
    return isinstance(token, On)

def is_connectby(token):
    """
        CONNECT BY句判定
    """
    from uroborosqlfmt.sql import ConnectBy
    return isinstance(token, ConnectBy)

def is_startwith(token):
    """
        START WITH句判定
    """
    from uroborosqlfmt.sql import StartWith
    return isinstance(token, StartWith)

def is_case(token):
    """
        CASE句判定
    """
    return isinstance(token, sql.Case)

def is_forupdate(token):
    """
        FOR UPDATE句判定
    """
    from uroborosqlfmt.sql import ForUpdate
    return isinstance(token, ForUpdate)

def is_waitornowait(token):
    """
        WAIT / NOWAIT句判定
    """
    from uroborosqlfmt.sql import WaitOrNowait
    return isinstance(token, WaitOrNowait)

def is_union(token):
    """
        UNION句判定
    """
    from uroborosqlfmt.sql import Union
    return isinstance(token, Union)

def is_join(token):
    """
        JOIN句判定
    """
    from uroborosqlfmt.sql import Join
    return isinstance(token, Join)

def is_mergewhen(token):
    """
        WHEN句判定
    """
    from uroborosqlfmt.sql import MergeWhen
    return isinstance(token, MergeWhen)

def is_mergeupdateinsertclause(token):
    """
         MERGEの内のDML判定
    """
    from uroborosqlfmt.sql import MergeUpdateInsertClause
    return isinstance(token, MergeUpdateInsertClause)


def is_between_keyword(token):
    """
        「BETWEEN」判定
    """
    return token.match(T.Keyword, "BETWEEN")

def is_and_keyword(token):
    """
        AND演算子判定
    """
    return token.match(T.Keyword, "AND")

def is_using_keyword(token):
    """
        USING判定
    """
    return token.match(T.Keyword, "USING")

def is_logical_operator_keyword(token):
    """
        AND・OR演算子判定
    """
    return token.match(T.Keyword, ("AND", "OR"))

def is_name_or_keyword(token):
    """
        name or keyword判定
    """
    return is_keyword(token) or token.ttype in T.Name

def is_operator(token):
    """
        演算子判定
    """
    return token.ttype in T.Operator

def is_comparison_operator(token):
    """
        比較演算子判定
    """
    return token.ttype in T.Operator.Comparison

def is_concat_operator(token):
    """
        文字列連結演算子判定
    """
    return is_operator(token) and token.value == "||"

def is_phrase(token):
    """
        Phrase判定
    """
    from uroborosqlfmt.sql import Phrase
    return isinstance(token, Phrase)

def is_calculation(token):
    """
        演算判定
    """
    from uroborosqlfmt.sql import Calculation
    return isinstance(token, Calculation)

def is_calc_operator(token):
    """
        演算子判定
    """
    if is_concat_operator(token):
        return True
    if is_operator(token) and not is_comparison_operator(token):
        return True

    return False


def is_enable(token):
    """
        有効Token判定（コメント・空白以外）
    """
    if token.is_whitespace():
        return False
    if is_comment(token):
        return False
    if token.parent and is_comment(token.parent):
        return False
    return True


def find_comparison_operator_words(tokens):
    """
        比較演算子の検索
    """
    prev = None
    for token in tokens[:]:
        if not is_enable(token):
            continue
        if not prev:
            prev = token
            continue

        comps = get_comparison_operator_words(prev, token)
        if comps:
            return comps
        prev = token

    if prev:
        return get_comparison_operator_words(prev, None)
    else:
        return []

def get_comparison_operator_words(token, next_token):
    """
        比較演算子の取得
    """
    if next_token and is_keyword(next_token):
        if is_keyword(token):
            if equals_ignore_case(token.value, "NOT"):
                if equals_ignore_case(next_token.value, ["IN", "BETWEEN", "LIKE"]):
                    return [token, next_token]
            elif equals_ignore_case(token.value, "IS"):
                if equals_ignore_case(next_token.value, ["NOT"]):
                    return [token, next_token]
                else:
                    return [token]
        elif is_comparison_operator(token):
            if equals_ignore_case(next_token.value, ["ANY", "SOME", "ALL"]):
                return [token, next_token]
            else:
                return [token]
    else:
        if is_keyword(token):
            if equals_ignore_case(token.value, ["IN", "BETWEEN", "LIKE", "IS"]):
                return [token]
        elif is_comparison_operator(token):
            return [token]
    return []

def tokens_parenthesis_inner(parenthesis):
    """
        括弧内Tokenリストの取得
    """
    open_punc = parenthesis.token_next_match(0, T.Punctuation, '(')
    close_punc = parenthesis.token_next_match(open_punc, T.Punctuation, ')')
    return parenthesis.tokens_between(open_punc, close_punc)[1:-1]

def token_function_inner_parenthesis(func):
    ftoken = token_next_enable(func)
    return token_next_enable(func, ftoken)


def token_next_enable(token, idx = -1):
    """
        次の有効Tokenの取得
    """
    if not isinstance(idx, int):
        idx = token.token_index(idx)
    return token.token_matching(idx + 1, [is_enable])

def token_prev_enable(token, idx = -1):
    """
        前の有効Tokenの取得
    """
    if not isinstance(idx, int):
        idx = token.token_index(idx)

    if idx < 0:
        idx = len(token.tokens)
    prv = token.token_prev(idx)

    while is_comment(prv):
        prv = token.token_prev(prv)
    return prv

def flatten_tokens_prev(top_token, token):
    """
        前Tokenのgenerator
    """
    tgt = next(flatten(token))
    iterator = flatten(top_token)
    tokens = []
    for tkn in iterator:
        if tkn == tgt:
            break
        tokens.append(tkn)
    for tkn in tokens[::-1]:
        yield tkn

def flatten_tokens_next(top_token, token):
    """
        後Tokenのgenerator
    """
    tgt = list(flatten(token))[-1]
    iterator = flatten(top_token)
    for tkn in iterator:
        if tkn == tgt:
            break
    for tkn in iterator:
        yield tkn

def token_parents(token):
    """
        親Tokenのgenerator
    """
    while token:
        yield token
        token = token.parent

def token_top_matching(token, sub, func):
    """
        親を走査してヒットするTokenがあるか判定
    """
    def in_parents(tkn):
        for parent in token_parents(sub):
            if tkn == parent:
                return True
        return False

    parents = token_parents(token)
    tkn = None
    for parent in parents:
        if func(parent):
            if in_parents(parent):
                return None
            tkn = parent
            break

    for parent in parents:
        if in_parents(parent):
            return tkn
        if not func(parent):
            return tkn
        tkn = parent
    return tkn


def within_with_section(stmt, token):
    """
        WITH句内判定
    """
    for tkn in tokens_tree_up(stmt, token):
        if equals_ignore_case(tkn.value, "WITH"):
            return tkn
        if is_dml(tkn):
            return None
    return None

def within_select_statement(stmt, token):
    """
        SELECT句内判定
    """
    for tkn in tokens_tree_up(stmt, token):
        if is_dml(tkn):
            if equals_ignore_case(tkn.value, "SELECT"):
                return tkn
            return None
    return None

def within_update_statement(stmt, token):
    """
        UPDATE句内判定
    """
    for tkn in tokens_tree_up(stmt, token):
        if is_dml(tkn):
            if equals_ignore_case(tkn.value, "UPDATE"):
                return tkn
            return None
    return None

def within_insert_statement(stmt, token):
    """
        INSERT句内判定
    """
    for tkn in tokens_tree_up(stmt, token):
        if is_dml(tkn):
            if equals_ignore_case(tkn.value, "INSERT"):
                return tkn
            return None
    return None

def within_merge_statement(stmt, token):
    """
        MERGE句内判定
    """
    for tkn in tokens_tree_up(stmt, token):
        if is_dml(tkn):
            if equals_ignore_case(tkn.value, "MERGE"):
                return tkn
            return None
    return None

def within_insert_values_section(stmt, token):
    """
        INSERTのVALUES句内判定
    """
    itr = tokens_tree_up(stmt, token)
    for tkn in itr:
        if is_parenthesis(tkn):
            break

    for tkn in itr:
        if is_enable(tkn):
            if is_values_keyword(tkn):
                return tkn
            return None
    return None

def within_insert_into_columns_section(stmt, token):
    """
        INSERTのカラム内判定
    """
    itr = tokens_tree_up(stmt, token)
    for tkn in itr:
        if is_parenthesis(tkn):
            break

    for tkn in itr:
        if is_enable(tkn):
            if is_identifier(tkn):
                break
            elif is_insert_dml(tkn):
                return tkn
            else:
                return None

    for tkn in itr:
        if is_enable(tkn):
            if is_into_keyword(tkn):
                return tkn
            return None
    return None


def within_update_set_section(stmt, token):
    """
        UPDATEのSET句内判定
    """
    if not within_update_statement(stmt, token):
        return None
    if within_where_section(stmt, token):
        return None

    itr = tokens_tree_up(stmt, token)
    for tkn in itr:
        if is_set_keyword(tkn):
            return tkn

    return None

def within_where_section(stmt, token):
    """
        WHERE句内判定
    """
    for tkn in tokens_tree_up(stmt, token):
        if equals_ignore_case(tkn.value, "WHERE"):
            return tkn
        if is_dml(tkn):
            return None
    return None

def within_function(stmt, token):
    """
        関数内判定
    """
    for tkn in get_roots(stmt, token)[:]:
        if is_function(tkn):
            return tkn
    return None

def within_parenthesis(stmt, token):
    """
        括弧内判定
    """
    for tkn in get_roots(stmt, token)[:]:
        if is_parenthesis(tkn):
            return tkn
    return None

def tokens_tree_up(stmt, token):
    """
        ツリー上での前へのgenerator
    """
    roots = get_roots(stmt, token)
    cld = roots.pop(0)
    while roots:
        parent = roots.pop(0)
        prevs = []
        for tkn in parent.tokens:
            prevs.append(tkn)
            if tkn == cld:
                cld = parent
                break
        for tkn in prevs[::-1]:
            yield tkn

def get_roots(parent, token):
    """
        ルートTokenリスト
    """
    for tkn in parent.tokens:
        if tkn == token:
            return [token, parent]
        if isinstance(tkn, sql.TokenList):
            ret = get_roots(tkn, token)
            if ret:
                ret.append(parent)
                return ret
    return []

def get_parent(top_parent, token):
    """
        ルートを指定した親Token取得
    """
    for tkn in top_parent.tokens:
        tkn.parent = top_parent
        if tkn == token:
            return top_parent
        if isinstance(tkn, sql.TokenList):
            ret = get_parent(tkn, token)
            if ret:
                return ret
    return None

def flatten(token):
    """
        フラット化したgenerator
        ※処理中にparentを再設定する。sql.TokenList#flattenとはここが違う
    """
    if isinstance(token, sql.TokenList):
        for tkn in token.tokens:
            tkn.parent = token
            if isinstance(tkn, sql.TokenList):
                for item in flatten(tkn):
                    yield item
            else:
                yield tkn
    else:
        yield token


CONDITION = 1
VALUE = 2

def get_cases(case):
    """Returns a list of 2-tuples (condition, value).

    If an ELSE exists condition is None.
    """


    ret = []
    mode = CONDITION

    for token in case.tokens:
        # Set mode from the current statement
        if token.match(T.Keyword, 'CASE'):
            continue

        elif is_when(token):
            ret.append(([], []))
            mode = CONDITION

        elif token.match(T.Keyword, 'THEN'):
            mode = VALUE

        elif token.match(T.Keyword, 'ELSE'):
            ret.append((None, []))
            mode = VALUE

        elif token.match(T.Keyword, 'END'):
            mode = None

        # First condition without preceding WHEN
        if mode and not ret:
            ret.append(([], []))

        # Append token depending of the current mode
        if mode == CONDITION:
            ret[-1][0].append(token)

        elif mode == VALUE:
            ret[-1][1].append(token)

    # Return cases list
    return ret

def equals_ignore_case(txt1, txt2):
    """
        大文字小文字を無視した文字列比較
    """
    if isinstance(txt2, str):
        values = {re.compile(txt2 + "$", re.IGNORECASE)}
    else:
        values = set(re.compile(v + "$", re.IGNORECASE) for v in txt2)

    for pattern in values:
        if pattern.match(txt1):
            return True
    return False

def startswith_ignore_case(target, txt):
    """
        大文字小文字を無視したstartswith
    """
    if isinstance(txt, str):
        values = {re.compile(txt, re.IGNORECASE)}
    else:
        values = set(re.compile(v, re.IGNORECASE) for v in txt)

    for pattern in values:
        if pattern.match(target):
            return True
    return False

def endswith_ignore_case(target, txt):
    """
        大文字小文字を無視したendswith
    """
    if isinstance(txt, str):
        values = {re.compile(txt + "$", re.IGNORECASE)}
    else:
        values = set(re.compile(v + "$", re.IGNORECASE) for v in txt)

    for pattern in values:
        if pattern.search(target):
            return True
    return False