python source code of DyStockDataUtility

import math
from collections import OrderedDict
import itertools

import pandas as pd
import talib
import numpy as np

from DyCommon.DyCommon import *
from ...Common.DyStockCommon import *


class DyStockDataUtility(object):
    """ 股票数据工具箱，主要用来计算技术指标和统计相关的数据 """

    def getMinBars(df, m=1):
        """
            合成分钟K线
            @m: 几分钟
        """
        # 合成分钟Bar, 右闭合
        # 缺失的Bar设为NaN
        df = df.resample(str(m) + 'min', closed='right', label='right')[['price', 'volume']].agg(OrderedDict([('price', 'ohlc'), ('volume', 'sum')]))
        df.dropna(inplace=True) # drop缺失的Bars

        return df

    def getMas(df, mas, dropna=True, indicator='close'):
        """
            获取周期内的指标均值
            @mas: [5, 10, 20, 30, 60, ...]
        """
        if df is None:
            return pd.DataFrame([])

        means = []
        for ma in mas:
            mean = df[indicator].rolling(center=False, window=ma).mean() # new method at v0.18.0
            mean.name = 'ma%s'%ma
            means.append(mean)

        df = pd.DataFrame(means).T
        
        return df.dropna() if dropna else df

    def getDealMas(df, mas, dropna=True):
        """
            获取周期内的成交均价
            @mas: [5, 10, 20, 30, 60, ...]
        """
        if df is None:
            return pd.DataFrame([])

        means = []
        for ma in mas:
            amtSum = df['amt'].rolling(center=False, window=ma).sum()
            volSum = df['volume'].rolling(center=False, window=ma).sum()

            mean = amtSum/volSum

            mean.name = 'ma%s'%ma
            means.append(mean)

        df = pd.DataFrame(means).T
        
        return df.dropna() if dropna else df

    def getKamas(df, mas, dropna=True):
        """
            获取周期内的考夫曼均价
            @mas: [5, 10, 20, 30, 60, ...]
        """
        if df is None:
            return pd.DataFrame([])

        means = {}
        names = []
        for ma in mas:
            mean = talib.KAMA(df['close'].values, ma)
            name = 'kama%s'%ma
            
            means[name] = mean
            names.append(name)

        df = pd.DataFrame(means, index=df.index, columns=names)

        return df.dropna() if dropna else df

    def getAtrRatio(df, period=14):
        """
            平均波动率：ATR(14)/MA(14)
        """
        highs = df['high']
        lows = df['low']
        closes = df['close']

        atr = talib.ATR(highs, lows, closes, timeperiod=period)
        ma = talib.MA(closes, timeperiod=period)

        volatility = atr/ma

        s = pd.Series(volatility, index=df.index, name='volatility').dropna()

        return s

    def getVolatilityEfficiencyRatio(series):
        """
            获取波动效率[-1, 1]，正：上涨，负：下跌
            @return: 波动效率，每次波动占比
        """
        # 趋势和总波动
        direction = series[-1] - series[0]

        # 绝对波动
        change = series - series.shift(1)
        volatility = abs(change).sum()

        #　效率系数
        efficiencyRatio = direction/volatility

        # 波动比(每天波动占总体波动多少)
        volatilityRatio = abs(change)/abs(direction)

        return efficiencyRatio, volatilityRatio.dropna()

    def getAreaRatio(df):
        """
            获取DF的面积比(%)
            @return: 面积比
        """
        high = df['high'].max()
        low = df['low'].min()
        totalArea = (high - low)*df.shape[0]

        priceArea = (df['high'] - df['low']).sum()

        return None if totalArea == 0 else  priceArea/totalArea*100

    def getBBands(df, period=10, stdNbr=2):
        try:
            close = df['close']
        except Exception as ex:
            return None

        try:
            upper, middle, lower = talib.BBANDS(
                                close.values, 
                                timeperiod=period,
                                # number of non-biased standard deviations from the mean
                                nbdevup=stdNbr,
                                nbdevdn=stdNbr,
                                # Moving average type: simple moving average here
                                matype=0)
        except Exception as ex:
            return None

        data = dict(upper=upper, middle=middle, lower=lower)
        df = pd.DataFrame(data, index=df.index, columns=['upper', 'middle', 'lower']).dropna()

        return df

    def _cosVector(x, y):
        result1 = 0.0
        result2 = 0.0
        result3 = 0.0

        for i in range(len(x)):
            result1 += x[i]*y[i]   #sum(X*Y)
            result2 += x[i]**2     #sum(X*X)
            result3 += y[i]**2     #sum(Y*Y)

        cos = result1/((result2*result3)**0.5)

        if cos > 1: cos = 1
        if cos < -1: cos = -1

        return cos

    def _rotateAngle(v1, v2):
        """
            二维坐标系的anticlockwise angle.If clockwise, return negative angle.
            
            @v1 or @v2: like (x1, y1) or [x1, y1]
            @return: 从向量v1到向量v2的逆时针夹角
        """
        cos = DyStockDataUtility._cosVector(v1, v2)

        try:
            angle = math.acos(cos)
        except:
            return None

        _, y1 = v1
        _, y2 = v2

        # cross product
        cross = y2 - y1

        # vector v2 is clockwise from vector v1
        # with respect to the origin (0.0)
        if cross < 0:
            angle = -angle

        degree = angle *  180.0 / math.pi

        return degree

    def xAngles(seriesY, orgY=None, newMaxY=None):
        """
            连续两个Y值跟X轴的夹角，@seriesY的index是X轴坐标
            @seriesY：Pandas Series类型, 都是非负值
            @orgY：Y轴原点，Y值基于它做百分比标准化. None：不需要标准化
            @newMaxY: 新Y轴的最大值，标准化后的值需要等比例变换到新Y轴
            @return: Series of angles
        """
        if orgY < 0 or (seriesY < 0).sum() > 0 or seriesY.shape[0] < 2:
            return None

        if orgY is not None:
            seriesY /= orgY
            seriesY = (seriesY - 1)*100

        if newMaxY is not None:
            seriesY = (seriesY/seriesY.max()) * newMaxY

        # 计算Y轴方向上的增量
        seriesY = seriesY - seriesY.shift(1) # Y of vectors
        seriesY = seriesY.dropna()

        data = {}
        for index in seriesY.index:
            angle = DyStockDataUtility._rotateAngle((1, 0), (1, seriesY[index]))

            data[index] = angle

        s = pd.Series(data)
        s.name = 'xAngle'

        return s

    def xAngle(y1, y2, orgY=None, scale=1):
        """
            以@y1为基准标准化，计算向量[(0, @y1), (1, @y2)]跟X轴的夹角。
            X轴方向以一个单位变化。
            通常状况，X轴代表的相等的时间周期，Y轴代表价格。所以这里省略了X轴。

            @orgY：Y轴原点，Y值基于它做百分比标准化. None：不需要标准化
            @scale: 以相对变化率作为新的Y坐标值，即比例尺
                    变化后的@y1 = 0, @y2 = ((@y2-@y1)*100/@y1)*@scale
        """
        assert( y1 > 0 and y2 > 0)

        # 标准化
        if orgY is not None:
            y1 = (y1 - orgY)/orgY*100
            y2 = (y2 - orgY)/orgY*100

        # 比例放大
        y1 *= scale
        y2 *= scale

        # 计算增量
        y2 -= y1

        angle = DyStockDataUtility._rotateAngle((1, 0), (1, y2))

        return angle

    def getJaccardIndex(index, startDate, endDate, param, daysEngine, info, codes=None, load=True):
        """
            @param: {N Days: increase percent in N Days}
            @return: original DF, Jaccard Index DF, code set DF（代码交集）, {code: increase DF}, code table
        """
        def codeSet(data):
            # seperate into code set
            data = set(data.split(','))
            try:
                data.remove('')
            except Exception as ex:
                pass

            return data

        # load
        days = sorted(param)
        if load:
            if not daysEngine.load([-days[-1], startDate, endDate], codes=codes):
                return None, None, None, None, None

        info.print('开始杰卡德指数[{0}]统计...'.format(daysEngine.stockIndexes[index]), DyLogData.ind)

        startDay = daysEngine.tDaysOffset(startDate)
        progress = DyProgress(info)

        codes = daysEngine.getIndexStockCodes(index)

        progress.init(len(codes), 100, 5)

        # get bool increase and scalar increase for each code
        # no need to dropna, it's controlled by algorithm
        scalarIncreaseDfs = {}
        boolIncreaseDfs = {}
        for code, name in codes.items():
            df = daysEngine.getDataFrame(code)
            if df is None:
                progress.update()
                continue

            scalarIncreaseList = []
            boolIncreaseList = []
            close = df['close']
            for day in days:
                shift = close.shift(day)

                # scalar increase
                increase = (close - shift)*100/shift # no need to dropna
                increase.name = str(day) + ',' + str(param[day])
                scalarIncreaseList.append(increase)
                
                # bool increase
                increase = increase >= param[day]

                boolIncreaseList.append(increase)

            # bool increase
            df = pd.concat(boolIncreaseList, axis=1)
            df.replace([True, False], [code + ',', ''], inplace=True)
            
            boolIncreaseDfs[code] = df

            # scalar increase
            df = pd.concat(scalarIncreaseList, axis=1)
            df.replace([True, False], [code + ',', ''], inplace=True)
            
            scalarIncreaseDfs[code] = df

            progress.update()

        # combine into code set
        progress.init(len(boolIncreaseDfs), 100, 5)

        newDf = None
        for code, df in boolIncreaseDfs.items():
            if newDf is None:
                newDf = df
            else:
                newDf = newDf.add(df, fill_value='')

            progress.update()

        if newDf is None:
            return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), scalarIncreaseDfs, daysEngine.stockCodes

        # 补全交易日, 指数不会停牌
        df = daysEngine.getDataFrame(index)
        df = df['close'].copy()
        df = ''
        newDf = newDf.add(df, fill_value='')

        # 使DF的每个元素是一个代码集合
        newDf = newDf.applymap(codeSet)

        # calculate Jaccard index, 从原始数据中统计
        columns = list(newDf.columns)
        newIndex = newDf.index.map(lambda x: x.strftime('%Y-%m-%d')).tolist()
        data = newDf.values.tolist()

        startDayPos = newDf.index.get_loc(startDay)

        newData = {} # 杰卡德指数
        newCodeSetData = {} # 代码交集
        for pos, (index_, data_) in enumerate(zip(newIndex[startDayPos:], data[startDayPos:]), startDayPos):
            # calculate Jaccard index for each combination
            jaccardIndexes = {}
            jaccardIndexesCodeSet = {}
            combinations = list(itertools.combinations(range(len(columns)), 2))
            for a, b in combinations:
                # get A set
                assert(days[a] < days[b])
                aSetPos = pos - (days[b] - days[a]) # date sequence like 0, A Days, B Days
                aSet = data[aSetPos][a]

                # B set is always data of current date
                intersection = aSet & data_[b]

                try:
                    #jaccardIndex = len(intersection)/len(aSet | data_[b])
                    jaccardIndex = len(intersection)/len(aSet) # !!!only A set, not union of A set and B set, different with Jaccard Index defination
                except Exception as ex:
                    jaccardIndex = 0 # !!!Null Sets, different with Jaccard Index defination

                key = 'J({0};{1})'.format(columns[a], columns[b])
                jaccardIndexes[key] = jaccardIndex
                jaccardIndexesCodeSet[key] = intersection

            # finish Jaccard index for one day
            newData[index_] = jaccardIndexes
            newCodeSetData[index_] = jaccardIndexesCodeSet

        info.print('杰卡德指数[{0}]统计完成'.format(daysEngine.stockIndexes[index]), DyLogData.ind)

        return newDf, pd.DataFrame(newData).T, pd.DataFrame(newCodeSetData).T, scalarIncreaseDfs, daysEngine.stockCodes

    def edExtremaPIPs(df, w=4, peakIndicator='high', bottomIndicator='low'):
        """
            以滑动窗口为周期获取欧氏距离的极值PIPs
            这里的滑动窗口是相对@rwExtremas而言
        """
        windowSize = 2*w + 1

        pipsPct = windowSize*100/df.shape[0]

        peaks = DyStockDataUtility.edPIPs(df, pipsPct, peakIndicator, True)
        bottoms = DyStockDataUtility.edPIPs(df, pipsPct, bottomIndicator, True)

        extremas = pd.concat([peaks, bottoms]).sort_index()

        return extremas, peaks, bottoms

    def edPIPs(df, pipsPct, indicator='close', excludeHeadTail=False):
        """
            获取指定列的欧氏距离的PIPs（perceptually important points）
            @pipsPct: PIP点的百分占比，不包含头尾两个PIPs，因为初始就有头尾两个PIPs
            @excludeHeadTail: 返回的PIPs不含有默认头尾两个PIPs
            @return: PIPs的Series
        """
        assert(df.shape[0] > 2)

        # 原始值
        series = df[indicator]
        x = np.arange(series.shape[0])
        y = series.values
        size = x.shape[0]
        nbrPIPs = round(size*pipsPct/100)

        # 初始化相邻PIPs矩阵, 已经计算出来的PIP用nan标识
        # row0: 左PIPs， row1: 右PIPs
        xAdjacents = np.array([[x[0]]*size, [x[-1]]*size], dtype=np.float64)
        xAdjacents[0][0] = np.nan; xAdjacents[0][-1] = np.nan
        xAdjacents[1][0] = np.nan; xAdjacents[1][-1] = np.nan

        yAdjacents = np.array([[y[0]]*size, [y[-1]]*size])
        yAdjacents[0][0] = np.nan; yAdjacents[0][-1] = np.nan
        yAdjacents[1][0] = np.nan; yAdjacents[1][-1] = np.nan

        # PIPs的索引
        indexPIPs = [0, size-1]

        # 计算PIPs
        for _ in range(nbrPIPs):
            # 计算中间点跟相邻两个PIPs之间的欧式距离
            ed = np.sqrt((x - xAdjacents[0])**2 + (y - yAdjacents[0])**2) + np.sqrt((x - xAdjacents[1])**2 + (y - yAdjacents[1])**2)

            # 最大距离的index
            try:
                edMax = np.nanargmax(ed)
            except ValueError as ex: # All-NaN slice encountered
                break

            # 标识PIP
            xAdjacents[0][edMax] = np.nan; xAdjacents[1][edMax] = np.nan
            yAdjacents[0][edMax] = np.nan; yAdjacents[1][edMax] = np.nan

            indexPIPs.append(edMax)
            indexPIPs.sort()
            edMaxIndex = indexPIPs.index(edMax)

            # 重新计算相邻PIPs矩阵
            xAdjacents[0][edMax + 1:indexPIPs[edMaxIndex+1]] = edMax # 左PIP向右广播到NaN
            xAdjacents[1][indexPIPs[edMaxIndex-1] + 1:edMax] = edMax # 右PIP向左广播到NaN
            yAdjacents[0][edMax + 1:indexPIPs[edMaxIndex+1]] = y[edMax]
            yAdjacents[1][indexPIPs[edMaxIndex-1] + 1:edMax] = y[edMax]

        # 去除头尾
        if excludeHeadTail:
            indexPIPs = indexPIPs[1:-1]

        return series[indexPIPs]

    def rwExtremas(df, w=4, peakIndicator='high', bottomIndicator='low'):
        """
            获取滑动窗口的极值
            Get peak or bottom extrema in a window of size 2w+1 centered on this observation
            @return: extremas, peaks, bottoms, type is Series
        """
        def _extrema(rwArray):
            # 极大值
            extrema = _peak(rwArray)

            # 极小值
            if np.isnan(extrema):
                extrema = _bottom(rwArray)

            return extrema

        def _peak(rwArray):
            center = rwArray[w]

            # 极大值
            if center >= rwArray[:w].max() and center >= rwArray[w+1:].max():
                return True

            return np.nan

        def _bottom(rwArray):
            center = rwArray[w]

            # 极小值
            if center <= rwArray[:w].min() and center <= rwArray[w+1:].min():
                return False

            return np.nan

        if peakIndicator == bottomIndicator:
            series = df[peakIndicator]
        
            extremas = series.rolling(2*w + 1, center=True).apply(_extrema)

            peaks = series[extremas==True]
            bottoms = series[extremas==False]
            extremas = series[extremas.notnull()]

        else:
            peakSeries = df[peakIndicator]
            bottomSeries = df[bottomIndicator]
        
            peaks = peakSeries.rolling(2*w + 1, center=True).apply(_peak)
            bottoms = bottomSeries.rolling(2*w + 1, center=True).apply(_bottom)

            peaks = peakSeries[peaks.notnull()]
            bottoms = bottomSeries[bottoms.notnull()]
            
            extremas = pd.concat([peaks, bottoms])
            extremas.sort_index(inplace=True)

        # rename
        extremas.name = 'extrema'
        peaks.name = 'peak'
        bottoms.name = 'bottom'

        return extremas, peaks, bottoms

    def swings(df, w=4):
        """
            获取DF的波段值
            @w: 滑动窗口大小
            @return: extremas, peaks, bottoms, type is Series
        """
        # 获取滚动窗口极值
        extremas, peaks, bottoms = DyStockDataUtility.rwExtremas(df, w=w)

        # 去除相邻值相同的点
        extremas = extremas.loc[extremas.shift(1) != extremas]

        # 获取有效极值点
        peaks, bottoms = [], [] # 极值索引list
        extremaList = extremas.values.tolist()
        for i in range(1, len(extremaList) - 1):
            if extremaList[i] >= extremaList[i-1] and extremaList[i] >= extremaList[i+1]:
                peaks.append(i)
            elif extremaList[i] <= extremaList[i-1] and extremaList[i] <= extremaList[i+1]:
                bottoms.append(i)

        # 处理两头
        if peaks and bottoms:
            # 头
            if peaks[0] < bottoms[0]:
                bottoms.insert(0, 0)
            else:
                peaks.insert(0, 0)

            #　尾
            if peaks[-1] < bottoms[-1]:
                peaks.append(len(extremaList) - 1)
            else:
                bottoms.append(len(extremaList) - 1)

        elif peaks:
            bottoms.insert(0, 0)
            bottoms.append(len(extremaList) - 1)

        elif bottoms:
            peaks.insert(0, 0)
            peaks.append(len(extremaList) - 1)
        else:
            return pd.Series(), pd.Series(), pd.Series()

        extremaList = sorted(peaks + bottoms)

        return extremas[extremaList], extremas[peaks], extremas[bottoms]

    def dealsHSARs(df, volatility=2, hsarsVolatility=5):
        """
            根据分笔成交计算支撑和阻力价格，支撑和阻力价格取最大成交量区间的均值
            @df: ticksDF
            @volatility: 价格波动率(%)，把@df里的价格按波动率分成若干个区间
            @hsarsVolatility：每@volatility个波动一个支撑或者阻力价格区间
        """
        def _group(x):
            # 最大值归为@granularity-1
            return min(int((x - priceMin)/priceStep), granularity-1)

        def _calc(df):
            df = df.reset_index()

            volumeSum = df['volume'].sum()

            # 均价
            price = (df['price']*df['volume']).sum()/volumeSum

            return pd.DataFrame({'price': price, 'volume': volumeSum}, index=[0])

        prices = df['price']

        # 价差
        priceMin = prices.min()
        priceMax = prices.max()
        priceDiff = priceMax - priceMin
        pricePct = priceDiff*100/priceMin
        granularity = math.ceil(pricePct/volatility)
        priceStep = priceDiff/granularity
        hsarsNbr = int(granularity/hsarsVolatility)
        hsarsNbr = min(6, hsarsNbr) # 最多6个

        # 按价格区间计算成交均价及总成交量
        priceDf = df.set_index('price')
        prices = priceDf.groupby(_group).apply(_calc)

        # 按成交量排序并
        prices = prices.sort_values('volume', ascending=False)

        return prices['price'][:hsarsNbr].values.tolist()

    def _rwExtremaHs(prices, volatility, mean=True):
        """
            按滑动窗口的极值，计算水平线
            @prices: 极大值或者极小值
            @return: [[price, count]]
        """
        def _group(x):
            # 最大值归为@granularity-1
            return min(int((x - priceMin)/priceStep), granularity-1)

        def _calc(df):
            if df.index.size > 0:
                count = df.index.size

                if mean: # 均价
                    sumExtremas = sum(df.index.values.tolist())
                    price = sumExtremas/count
                else:
                    if prices.name == 'bottom':
                        price = df.index.values.min()
                    else:
                        price = df.index.values.max()
            else:
                price = np.nan
                count = np.nan

            return pd.DataFrame({'price': price, 'count': count}, index=[0])

        # 价差
        priceMin = prices.min()
        priceMax = prices.max()
        priceDiff = priceMax - priceMin
        pricePct = priceDiff*100/priceMin
        granularity = math.ceil(pricePct/volatility)
        priceStep = priceDiff/granularity

        # 按价格区间计算极值均价
        priceDf = prices.reset_index().set_index(prices.name)
        prices = priceDf.groupby(_group).apply(_calc)
        prices.dropna(inplace=True)

        # resort columns
        prices = prices.reindex(columns=['price', 'count'])

        return prices.values.tolist()

    def rwExtremaHSARs(df, w=4, volatility=5):
        """
            根据滑动窗口极值计算支撑和阻力价格，支撑和阻力价格取区间极值的均值
            @df: DF, e.g. ticksDF, daysDF, minsDF
            @volatility: 价格波动率(%)，把@df里的价格按波动率分成若干个区间
            @return: HSARs like [[price, count]]
        """
        def _group(x):
            # 最大值归为@granularity-1
            try:
                return min(int((x - priceMin)/priceStep), granularity-1)
            except Exception:
                return granularity-1

        def _calc(df):
            if df.index.size > 0:
                sumExtremas = sum(df.index.values.tolist())
                count = df.index.size

                # 均价
                price = sumExtremas/count
            else:
                price = np.nan
                count = np.nan

            return pd.DataFrame({'price': price, 'count': count}, index=[0])

        extremas, peaks, bottoms = DyStockDataUtility.rwExtremas(df, w)

        prices = extremas

        # 价差
        priceMin = prices.min()
        priceMax = prices.max()
        priceDiff = priceMax - priceMin
        pricePct = priceDiff*100/priceMin

        try:
            granularity = math.ceil(pricePct/volatility)
        except Exception:
            granularity = 2

        priceStep = priceDiff/granularity

        # 按价格区间计算极值均价
        priceDf = extremas.reset_index().set_index('extrema')
        prices = priceDf.groupby(_group).apply(_calc)
        prices.dropna(inplace=True)

        # resort columns
        prices = prices.reindex(columns=['price', 'count'])

        return prices.values.tolist()

    def rwPeakBottomHSARs(df, w=4, volatility=5, mean=True):
        """
            根据滑动窗口最小极值和最大极值计算支撑和阻力价格
            @df: DF, e.g. ticksDF, daysDF, minsDF
            @volatility: 价格波动率(%)，把@df里的价格按波动率分成若干个区间
            @mean: 取极值的平均值还是极值的最大或最小值，peak是最大值，bottom是最小值
            @return: (HSs, HRs), HSs and HRs like [[price, count]]
        """
        extremas, peaks, bottoms = DyStockDataUtility.rwExtremas(df, w)

        hss = DyStockDataUtility._rwExtremaHs(bottoms, volatility, mean)
        hrs = DyStockDataUtility._rwExtremaHs(peaks, volatility, mean)

        return hss, hrs

    def trendLine(df):
        """
            获取DF的趋势线
            判定原则基于《专业投机原理》
            若周期内最高价和最低价相邻，则没有趋势线，也是说返回None, None
            @return: series, bool. series - 2 points with Series type, point is like (time, high/low). bool - 上升趋势还是下降趋势
        """
        highs = df['high'].values
        lows = df['low'].values

        highestX = np.nanargmax(highs); highestY = highs[highestX]
        lowestX = np.nanargmin(lows); lowestY = lows[lowestX]

        if highestX < lowestX: # 下降趋势
            # 下降趋势线判别公式：
            #   (x2-x1)(y-y1) - (y2-y1)(x-x1) > 0 右侧
            #   (x2-x1)(y-y1) - (y2-y1)(x-x1) < 0 左侧

            highsY = highs[highestX + 1:lowestX]
            highsX = np.array(range(highestX + 1, lowestX))

            # 从最低点开始寻找某一高点，使之与最高点连成趋势线
            for i in range(lowestX - 1, highestX, -1): # Loop X axis
                result = (i - highestX)*(highsY - highestY) - (highs[i] - highestY)*(highsX - highestX)
                if (result > 0).sum() == 0:
                    return df['high'][[highestX, i]], False

        else: # 上升趋势
            # 上升趋势线判别公式：
            #   (x2-x1)(y-y1) - (y2-y1)(x-x1) < 0 右侧
            #   (x2-x1)(y-y1) - (y2-y1)(x-x1) > 0 左侧

            lowsY = lows[lowestX + 1:highestX]
            lowsX = np.array(range(lowestX + 1, highestX))

            # 从最低点开始寻找某一高点，使之与最高点连成趋势线
            for i in range(highestX - 1, lowestX, -1): # Loop X axis
                result = (i - lowestX)*(lowsY - lowestY) - (lows[i] - lowestY)*(lowsX - lowestX)
                if (result < 0).sum() == 0:
                    return df['low'][[lowestX, i]], True

        return None, None

    def getAtrExtreme(df, atrPeriod=14, emaPeriod=30, stdPeriod=30, atrExtremeFastPeriod=3, dropna=True):
        """
            获取TTI ATR Exterme通道, which is based on 《Volatility-Based Technical Analysis》
            TTI is 'Trading The Invisible'

            @atrPeriod: ATR N日平均
            @emaPeriod: 移动指数均线周期
            @stdPeriod: ATR Extermes的标准差周期
            @atrExtremeFastPeriod: ATR Extermes快速简单均线周期

            @return: DF
        """
        highs = df['high'].values
        lows = df['low'].values
        closes = df['close'].values

        # talib 的源码，它的 ATR 不是 N 日简单平均，而是类似 EMA 的方法计算的指数平均
        atr = talib.ATR(highs, lows, closes, timeperiod=atrPeriod)

        emaDf = df.ewm(span=emaPeriod).mean()

        atrExtremes = np.where(closes > emaDf['close'].values,
                               ((highs - emaDf['high'].values)/closes * 100) * (atr/closes * 100),
                               ((lows - emaDf['low'].values)/closes * 100) * (atr/closes * 100)
                               )

        atrExtremeSeries = pd.Series(atrExtremes)

        emaAtrExtremes = atrExtremeSeries.ewm(span=emaPeriod).mean().values
        stdAtrExtremes = atrExtremeSeries.rolling(center=False, window=stdPeriod).std().values
        maAtrExtremes = atrExtremeSeries.rolling(center=False, window=atrExtremeFastPeriod).mean().values

        atrExtremeDf = pd.DataFrame(data={'ema': emaAtrExtremes, 'std': stdAtrExtremes, 'ma': maAtrExtremes},
                                    index=df.index
                                    )

        return atrExtremeDf.dropna() if dropna else atrExtremeDf

    def countConsecutiveLine(df, bar='day', greenLine=True):
        """
            统计连续阴线或者阳线(含十字星), 至少2个。由于算法原因，忽略K线的第1根
            @df: 个股（基金）或者指数的Bar DF(OHLC)
            @bar: 日K线(day)还是分钟K线(min)
            @greenLine: True-阴线，False-阳线
            @return: counted DF
        """
        def _count(df):
            # 忽略一根K线，至少连续两根K线
            if df.shape[0] == 1:
                return None

            if greenLine: # 是否是连续阴线
                if df.ix[-1, 'close'] >= df.ix[-1, 'open']:
                    return None

            else: # 是否是连续阳线
                if df.ix[-1, 'close'] <= df.ix[-1, 'open']:
                    return None

            # 是否包含第一根K线
            if np.isnan(df.ix[0, 'preClose']): # 包含第一根K线
                if df.shape[0] == 2:
                    return None

                preClose = df.ix[1, 'preClose']
                consecutiveLineNbr = df.shape[0] - 1

            else:
                preClose = df.ix[0, 'preClose']
                consecutiveLineNbr = df.shape[0]

            data = {'开始时间': [df.index[0].strftime("%Y-%m-%d") if bar == 'day' else df.index[0].strftime("%Y-%m-%d %H:%M:%S")],
                    '结束时间': [df.index[-1].strftime("%Y-%m-%d") if bar == 'day' else df.index[-1].strftime("%Y-%m-%d %H:%M:%S")],
                    '连续阴线数' if greenLine else '连续阳线数': [consecutiveLineNbr],
                    '跌幅(%)' if greenLine else '涨幅(%)': [(df.ix[-1, 'close'] - preClose)*100/preClose],
                    '上缺口数': df['upGap'].sum(),
                    '下缺口数': df['downGap'].sum(),
                    '上缺口幅度(%)': df[df['upGap']]['upGapInc'].sum() if df['upGap'].sum() > 0 else None,
                    '下缺口幅度(%)': df[df['downGap']]['downGapInc'].sum() if df['downGap'].sum() > 0 else None
                    }

            return pd.DataFrame(data=data, columns=columns)

        columns = ['开始时间', '结束时间',
                   '连续阴线数' if greenLine else '连续阳线数',
                   '跌幅(%)' if greenLine else '涨幅(%)',
                   '上缺口数', '下缺口数', '上缺口幅度(%)', '下缺口幅度(%)']

        # concat previous close for calculating total drop down
        preCloses = df['close'].shift(1)
        preCloses.name = 'preClose'

        # 计算上缺口
        upGaps = df['low'] > df['high'].shift(1)
        upGaps.name = 'upGap'

        upGapsIncrease = (df['low'] - df['high'].shift(1))/preCloses*100
        upGapsIncrease = upGapsIncrease[upGaps]
        upGapsIncrease.name = 'upGapInc'

        # 计算下缺口
        downGaps = df['high'] < df['low'].shift(1)
        downGaps.name = 'downGap'

        downGapsIncrease = (df['high'] - df['low'].shift(1))/preCloses*100
        downGapsIncrease = downGapsIncrease[downGaps]
        downGapsIncrease.name = 'downGapInc'

        df = pd.concat([df, preCloses, upGaps, downGaps, upGapsIncrease, downGapsIncrease], axis=1)

        lineSeries = df['close'] <= df['open'] if greenLine else df['close'] >= df['open']

        lineDf = df.groupby((lineSeries != lineSeries.shift(1)).cumsum()).apply(_count)
        
        # !!!必须要先转成时间类型才能被用作索引
        lineDf = lineDf.set_index(pd.to_datetime(lineDf['结束时间']))

        # add after days increase
        afterIncreases = []
        for day in [1, 2, 3, 4, 5, 10, 20, 30, 60]:
            afterCloses = df['close'].shift(-day)

            afterIncrease = (afterCloses[lineDf.index] - df['close'][lineDf.index])*100/df['close'][lineDf.index]
            afterIncrease.name = '{0}{1}涨幅(%)'.format(day, '日' if bar == 'day' else '分')

            afterIncreases.append(afterIncrease)

        # concat after days increase
        lineDf = pd.concat([lineDf] + afterIncreases, axis=1)
        lineDf = lineDf.reindex(columns=columns + [s.name for s in afterIncreases]) # sort by columns

        return lineDf

    def getIntraDayBars(df, bar='1min'):
        """
            根据tick DF合成日内K线
        """
        # 合成Bar, 右闭合
        # 缺失的Bar设为NaN
        barDf = df.resample(bar, closed='right', label='right')[['price', 'volume']].agg(OrderedDict([('price', 'ohlc'), ('volume', 'sum')]))
        barDf.dropna(inplace=True) # drop缺失的Bars

        # remove multi-index of columns
        barDf = pd.concat([barDf['price'], barDf['volume']], axis=1)
        barDf = barDf[barDf['volume'] > 0] # 剔除无交易的Bar

        return barDf

    def countLimitUp(dfs, info):
        """
            统计每日封板数据
            @dfs: {code: DF}
            @return: DF - 未封板数, 封板数, 封板率(%), 封板数占总比(%)
        """
        info.print('开始统计封板数据...', DyLogData.ind)

        progress = DyProgress(info)
        progress.init(len(dfs), 100, 10)

        limitUpStats, nokLimitUpStats, totalCodeNbr = pd.Series(), pd.Series(), pd.Series()
        for _, df in dfs.items():
            # 非停牌或者非上市股票计数
            totalCodeNbr = totalCodeNbr.add(df['volume'] > 0, fill_value=0)

            # 封板
            closeChange = df['close'].pct_change()
            closeChange = closeChange[df['high'] != df['low']]
            boolCloseChange = closeChange > DyStockCommon.limitUpPct/100

            limitUpStats = limitUpStats.add(boolCloseChange, fill_value=0)

            # 未封板
            shiftClose = df['close'].shift(1)
            highChange = (df['high'] - shiftClose)/shiftClose
            highChange = highChange[df['high'] != df['low']]
            boolHighChange = highChange > DyStockCommon.limitUpPct/100
            boolHighChange = boolHighChange & -boolCloseChange

            nokLimitUpStats = nokLimitUpStats.add(boolHighChange, fill_value=0)

            progress.update()

        info.print('完成统计封板数据', DyLogData.ind)

        # sum for removing zero division exception
        limitUpSum = limitUpStats + nokLimitUpStats
        boolLimitSum = limitUpSum > 0

        # remove 0
        limitUpSum = limitUpSum[boolLimitSum]
        limitUpStats = limitUpStats[boolLimitSum]
        nokLimitUpStats = nokLimitUpStats[boolLimitSum]
        totalCodeNbr = totalCodeNbr[boolLimitSum]

        # limit-up ratio
        limitUpRatio = limitUpStats/limitUpSum*100

        # limit-up over total stocks
        limitUpTotalRatio = limitUpStats/totalCodeNbr*100

        # rename
        nokLimitUpStats.name = '未封板数'
        limitUpStats.name = '封板数'
        limitUpRatio.name = '封板率(%)'
        limitUpTotalRatio.name = '封板数占总比(%)'

        return pd.concat([nokLimitUpStats, limitUpStats, limitUpRatio, limitUpTotalRatio], axis=1)

    def getVolatility(df):
        """
            获取波动率
            @return: volatility Series
        """
        preCloses = df['close'].shift(1)

        highVolatility = (df['high'] - preCloses)/preCloses
        lowVolatility = (df['low'] - preCloses)/preCloses
        highLowVolatility = highVolatility - lowVolatility

        # 类似于True Range，但这里是比率
        trueVolatility = pd.concat([highVolatility, lowVolatility, highLowVolatility], axis=1)
        trueVolatility = trueVolatility.abs()
        trueVolatility = trueVolatility.max(axis=1)
        trueVolatility *= 100

        return trueVolatility.dropna()

    def getChipDistByDays(df, ohlcRatio=40, gridNbr=60):
        """
            根据日线，计算筹码分布
            @df: 日线DF
            @ohlcRatio: %, OHLC占比成交量的百分比
            @gridNbr: 其他价格的网格数
            @return: Series, index is price, value is volume(股数)
        """
        dfs = []

        # 按OHLC，四个价格均分成交量的ratio%
        ratio = ohlcRatio/100
        volumes = df['volume']*(ratio/4)
        for col in ['open', 'high', 'low', 'close']:
            df_ = pd.concat([df[col].rename('price'), volumes], axis=1)
            dfs.append(df_)

        # 剩余的按网格，每网格均分成交量的(1-ratio)%
        gridSeries = (df['high'] - df['low'])/gridNbr
        volumes = df['volume']*((1 - ratio)/(gridNbr - 1))
        for i in range(1, gridNbr):
            s = df['low'] + gridSeries*i
            s.name = 'price'

            df_ = pd.concat([s, volumes], axis=1)
            dfs.append(df_)

        # concat all
        df = pd.concat(dfs, axis=0)

        s = df['volume'].groupby(df['price']).sum()

        s.sort_index(inplace=True)

        return s

    def getChipDistByTicks(df):
        """
            根据Ticks，计算筹码分布
            @df: Ticks DF
            @return: Series, index is price, value is volume(股数)
        """
        s = df['volume'].groupby(df['price']).sum()
        s *= 100

        s.sort_index(inplace=True)

        return s

    def isMasLong(maDf, diffLong=True):
        """
            均线是否连续多头排列
            @maDf: DF of MAs with sorted columns
            @diffLong: bool, True - 短线均线之差大于等于长线均线之差，比如(ma10 - ma20) >= (ma20 - ma30)，False - 不检查这一项
        """
        maColumns = maDf.columns

        # 均线多头
        longs = None
        diffList = []
        for i in range(len(maColumns) - 1):
            diff = maDf[maColumns[i]] - maDf[maColumns[i+1]]

            # 多头排列
            bool = diff < 0
            if longs is None:
                longs = bool
            else:
                longs &= bool

            if longs.sum() > 0:
                return False

            diffList.append(diff)

        # 均线差值多头
        longs = None
        if diffLong:
            for i in range(len(diffList) - 1):
                bool = diffList[i+1] > diffList[i]

                # 差值多头排列
                if longs is None:
                    longs = bool
                else:
                    longs &= bool

                if longs.sum() > 0:
                    return False

        return True

    def getMasLong(maDf, diffLong=True):
        """
            获取均线连续多头排列天数
            @maDf: DF of MAs with sorted columns
            @diffLong: bool, True - 短线均线之差大于等于长线均线之差，比如(ma10 - ma20) >= (ma20 - ma30)，False - 不检查这一项
            @return: 从最近交易日算起的连续天数
        """
        maColumns = maDf.columns

        # 均线多头
        longs = None
        diffList = []
        for i in range(len(maColumns) - 1):
            diff = maDf[maColumns[i]] - maDf[maColumns[i+1]]

            # 多头排列
            bool = diff >= 0
            if longs is None:
                longs = bool
            else:
                longs &= bool

            diffList.append(diff)

        # 均线差值多头
        if diffLong:
            for i in range(len(diffList) - 1):
                bool = diffList[i] >= diffList[i+1]

                # 差值多头排列
                if longs is None:
                    longs = bool
                else:
                    longs &= bool

        # 找出连续多头天数
        start = longs.iloc[::-1].idxmin()
        longs = longs[start:]

        nbr = len(longs) - 1
        if nbr == 0: # check if all of these are MA long or no any MA long
            if longs[-1]:
                nbr = len(maDf)

        return nbr