Python pandas.Panel() Examples

The following are code examples for showing how to use pandas.Panel(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: QuantStudio   Author: Scorpi000   File: Correlation.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        FactorIDs, PriceIDs = self._Output.pop("因子ID"), self._Output.pop("证券ID")
        LastDT = max(self._Output["滚动相关性"][self.TestFactors[0]])
        self._Output["最后一期相关性"], self._Output["全样本相关性"] = {}, {}
        for iFactorName in self.TestFactors:
            self._Output["最后一期相关性"][iFactorName] = self._Output["滚动相关性"][iFactorName][LastDT].T
            self._Output["全样本相关性"][iFactorName] = pd.DataFrame(np.c_[self._Output["因子值"][iFactorName], self._Output["收益率"]]).corr(method=self.CorrMethod, min_periods=self.MinSummaryWindow).values[:len(FactorIDs), len(FactorIDs):].T
            self._Output["滚动相关性"][iFactorName] = pd.Panel(self._Output["滚动相关性"][iFactorName], major_axis=FactorIDs, minor_axis=PriceIDs).swapaxes(0, 2).to_frame(filter_observations=False).reset_index()
            self._Output["滚动相关性"][iFactorName].columns = ["因子ID", "时点"]+PriceIDs
        self._Output["最后一期相关性"] = pd.Panel(self._Output["最后一期相关性"], major_axis=PriceIDs, minor_axis=FactorIDs).swapaxes(0, 1).to_frame(filter_observations=False).reset_index()
        self._Output["最后一期相关性"].columns = ["因子", "因子ID"]+PriceIDs
        self._Output["全样本相关性"] = pd.Panel(self._Output["全样本相关性"], major_axis=PriceIDs, minor_axis=FactorIDs).swapaxes(0, 1).to_frame(filter_observations=False).reset_index()
        self._Output["全样本相关性"].columns = ["因子", "因子ID"]+PriceIDs
        self._Output.pop("收益率"), self._Output.pop("因子值")
        return 0 
Example 2
Project: QuantStudio   Author: Scorpi000   File: HDF5RDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_readFactorData__(self, dts, ids=None):
        Data = {}
        with self._RiskDB._DataLock:
            with h5py.File(self._RiskDB.MainDir+os.sep+self._Name+"."+self._RiskDB._Suffix, mode="r") as File:
                Group = File["FactorData"]
                for iDT in dts:
                    iDTStr = iDT.strftime("%Y-%m-%d %H:%M:%S.%f")
                    if iDTStr not in Group: continue
                    iGroup = Group[iDTStr]
                    Data[iDT] = pd.DataFrame(iGroup["Data"][...], index=iGroup["ID"][...], columns=iGroup["Factor"][...]).T
        if not Data: return pd.Panel(items=[], major_axis=dts, minor_axis=([] if ids is None else ids))
        Data = pd.Panel(Data).swapaxes(0, 1).loc[:, dts, :]
        if ids is not None:
            if Data.minor_axis.intersection(ids).shape[0]>0: Data = Data.loc[:, :, ids]
            else: Data = pd.Panel(items=Data.items, major_axis=dts, minor_axis=ids)
        return Data 
Example 3
Project: QuantStudio   Author: Scorpi000   File: HDF5RDB.py    GNU General Public License v3.0 6 votes vote down vote up
def readData(self, data_item, dts):
        Data = {}
        with self._RiskDB._DataLock:
            with h5py.File(self._RiskDB.MainDir+os.sep+self._Name+"."+self._RiskDB._Suffix, mode="r") as File:
                if data_item not in File: return None
                Group = File[data_item]
                for iDT in dts:
                    iDTStr = iDT.strftime("%Y-%m-%d %H:%M:%S.%f")
                    if iDTStr not in Group: continue
                    iGroup = Group[iDTStr]
                    if "columns" in iGroup:
                        Type = "DataFrame"
                        Data[iDT] = pd.DataFrame(iGroup["Data"][...], index=iGroup["index"][...], columns=iGroup["columns"][...])
                    else:
                        Type = "Series"
                        Data[iDT] = pd.Series(iGroup["Data"][...], index=iGroup["index"][...])
        if not Data: return None
        if Type=="Series": return pd.DataFrame(Data).T.loc[dts]
        else: return pd.Panel(Data).loc[dts] 
Example 4
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 6 votes vote down vote up
def readCov(self, dts, ids=None):
        NonCachedDTs, Cov = [], {}
        for iDT in dts:
            iCacheData = self.ErgodicMode._CacheData.get(iDT)
            if iCacheData is None:
                NonCachedDTs.append(iDT)
                continue
            if ids is not None: Cov[iDT] = iCacheData["Cov"].loc[ids, ids]
            else: Cov[iDT] = iCacheData["Cov"]
        if NonCachedDTs: Cov.update(dict(self.__QS_readCov__(dts=NonCachedDTs, ids=ids)))
        if not Cov: Cov = pd.Panel(items=dts, major_axis=ids, minor_axis=ids)
        else:
            Cov = pd.Panel(Cov).loc[dts]
            if ids is not None: Cov = Cov.loc[ids, ids]
        return Cov
    # 读取相关系数矩阵, Panel(items=[时点], major_axis=[ID], minor_axis=[ID]) 
Example 5
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 6 votes vote down vote up
def end(self):
        if not self.ErgodicMode._isStarted: return 0
        self.ErgodicMode._CacheData = None
        self.ErgodicMode._Queue2SubProcess.put(None)
        self.ErgodicMode._Queue2SubProcess = self.ErgodicMode._Queue2MainProcess = self.ErgodicMode._CacheDataProcess = None
        self.ErgodicMode._isStarted = False
        self.ErgodicMode._CurDT = None
        self._MMAPCacheData = None
        return 0

# 多因子风险数据库基类, 即风险矩阵可以分解成 V=X*F*X'+D 的模型, 其中 D 是对角矩阵, 必须存储的数据有:
# 因子风险矩阵: FactorCov(F), Panel(items=[时点], major_axis=[因子], minor_axis=[因子])
# 特异性风险: SpecificRisk(D), DataFrame(index=[时点], columns=[ID])
# 因子截面数据: FactorData(X), Panel(items=[因子], major_axis=[时点], minor_axis=[ID])
# 因子收益率: FactorReturn, DataFrame(index=[时点], columns=[因子])
# 特异性收益率: SpecificReturn, DataFrame(index=[时点], columns=[ID])
# 可选存储的数据有:
# 回归统计量: Statistics, {"tValue":Series(data=统计量, index=[因子]),"FValue":double,"rSquared":double,"rSquared_Adj":double} 
Example 6
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 6 votes vote down vote up
def readCov(self, dts, ids=None):
        Data = {}
        CachedDTs = sorted(set(dts).intersection(self.ErgodicMode._CacheData))
        if CachedDTs:
            FactorCov = self.readFactorCov(dts=CachedDTs)
            FactorData = self.readFactorData(dts=CachedDTs, ids=ids)
            SpecificRisk = self.readSpecificRisk(dts=CachedDTs, ids=ids)
            for iDT in FactorCov:
                if ids is None:
                    iIDs = SpecificRisk.loc[iDT].index
                    iFactorData = FactorData.loc[:, iDT].loc[iIDs]
                else:
                    iIDs = ids
                    iFactorData = FactorData.loc[:, iDT]
                iCov = np.dot(np.dot(iFactorData.values, FactorCov[iDT].values), iFactorData.values.T) + np.diag(SpecificRisk.loc[iDT].values**2)
                Data[iDT] = pd.DataFrame(iCov, index=iIDs, columns=iIDs)
        NewDTs = sorted(set(dts).difference(self.ErgodicMode._CacheData))
        if NewDTs: Data.update(dict(self.__QS_readCov__(dts=NewDTs, ids=ids)))
        return pd.Panel(Data).loc[dts]
    # 读取因子风险矩阵 
Example 7
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 6 votes vote down vote up
def readFactorData(self, dts, ids=None):
        NonCachedDTs, Data = [], {}
        for iDT in dts:
            iCacheData = self.ErgodicMode._CacheData.get(iDT)
            if iCacheData is None:
                NonCachedDTs.append(iDT)
                continue
            if ids is not None: Data[iDT] = iCacheData["FactorData"].loc[ids].T
            else: Data[iDT] = iCacheData["FactorData"].T
        if NonCachedDTs: Data.update(dict(self.__QS_readFactorData__(dts=NonCachedDTs, ids=ids).swapaxes(0, 1)))
        if not Data: return pd.Panel(major_axis=dts, minor_axis=ids)
        else:
            Data = pd.Panel(Data).loc[dts].swapaxes(0, 1)
            if ids is not None: Data = Data.loc[:, :, ids]
            return Data
    # 读取因子收益率 
Example 8
Project: QuantStudio   Author: Scorpi000   File: SQLRDB.py    GNU General Public License v3.0 6 votes vote down vote up
def readData(self, data_item, dts):
        SQLStr = "SELECT DateTime, "+data_item+" "
        SQLStr += "FROM "+self._DBTableName+" "
        SQLStr += "WHERE "+data_item+" IS NOT NULL "
        SQLStr += "AND ("+genSQLInCondition("DateTime", [iDT.strftime("%Y-%m-%d %H:%M:%S.%f") for iDT in dts], is_str=True, max_num=1000)+") "
        Data = {}
        Type = None
        for iDT, iData in self._RiskDB.fetchall(SQLStr):
            if Type is None:
                try:
                    Data[iDT] = pd.read_json(iData, orient="split")
                    Type = "frame"
                except:
                    Type = "series"
                    Data[iDT] = pd.read_json(iData, orient="split", typ=Type)
            else:
                Data[iDT] = pd.read_json(iData, orient="split", typ=Type)
        if not Data: return None
        if Type=="series": return pd.DataFrame(Data).T.loc[dts]
        else: return pd.Panel(Data).loc[dts] 
Example 9
Project: QuantStudio   Author: Scorpi000   File: TushareDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["日期", "ID"])
        Operator = args.get("算子", self.Operator)
        if Operator is None: Operator = (lambda x: x.tolist())
        Data = {}
        for iFactorName in factor_names:
            Data[iFactorName] = raw_data[iFactorName].groupby(axis=0, level=[0, 1]).apply(Operator).unstack()
        Data = pd.Panel(Data).loc[factor_names, :, ids]
        Data.major_axis = [dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis]
        LookBack = args.get("回溯天数", self.LookBack)
        if LookBack==0: return Data.loc[:, dts, ids]
        AllDTs = Data.major_axis.union(dts).sort_values()
        Data = Data.loc[:, AllDTs, ids]
        Limits = LookBack*24.0*3600
        for i, iFactorName in enumerate(Data.items):
            Data.iloc[i] = fillNaByLookback(Data.iloc[i], lookback=Limits)
        return Data.loc[:, dts] 
Example 10
Project: QuantStudio   Author: Scorpi000   File: ArcticDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        Data = {}
        for iID in ids:
            if self._Lib.has_symbol(iID):
                iMetaData = self._Lib.read_metadata(symbol=iID)
                iCols = pd.Series(iMetaData["Cols"], index=iMetaData["FactorNames"])
                iCols = iCols.loc[iCols.index.intersection(factor_names)]
                if iCols.shape[0]>0:
                    Data[iID] = self._Lib.read(symbol=iID, columns=iCols.values.tolist(), chunk_range=pd.DatetimeIndex(dts), filter_data=True)
                    Data[iID].columns = iCols.index
        if Data: return pd.Panel(Data).swapaxes(0, 2).loc[factor_names, :, ids]
        else: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
            

# 基于 Arctic 数据库的因子数据库
# 使用 CHUNKSTORE
# 每张表是一个 library, 每个 ID 是一个 Symbol
# 每个 Symbol 存储一个 DataFrame: index 是时间点, columns 是因子
# 因子描述信息存储在特殊的 Symbol: _FactorInfo 中
# 因子表的描述信息存储在 Symbol: _FactorInfo 的 metadata 中 
Example 11
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["日期", "ID"])
        Data = {}
        for iFactorName in raw_data.columns:
            iRawData = raw_data[iFactorName].unstack()
            iDataType = _identifyDataType(self._FactorInfo.loc[iFactorName, "DataType"])
            if iDataType=="double": iRawData = iRawData.astype("float")
            #elif iDataType=="datetime": iRawData = iRawData.applymap(lambda x: x.strftime("%Y-%m-%d %H:%M:%S") if pd.notnull(x) else None)
            Data[iFactorName] = iRawData
        Data = pd.Panel(Data).loc[factor_names]
        LookBack = args.get("回溯天数", self.LookBack)
        return self._adjustDataDTID(Data, LookBack, factor_names, ids, dts, args.get("只起始日回溯", self.OnlyStartLookBack), args.get("只回溯非目标日", self.OnlyLookBackNontarget))


# 信息发布表, 表结构特征:
# 公告日期, 表示信息发布的时点;
# 截止日期, 表示信息有效的时点;
# 如果不忽略公告日期, 则以截止日期和公告日期的最大值作为数据填充的时点, 同一填充时点存在多个截止日期时以最大截止日期的记录值填充
# 如果忽略公告日期, 则以截止日期作为数据填充的时点, 必须保证截至日期具有唯一性
# 条件字段, 作为条件过滤记录; 可能存在多个条件字段
# 在设定某些条件下, 数据填充时点和 ID 可以唯一标志一行记录
# 先填充表中已有的数据, 然后根据回溯天数参数填充缺失的时点 
Example 12
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["日期", "ID"])
        Operator = args.get("算子", self.Operator)
        if Operator is None: Operator = (lambda x: x.tolist())
        Data = {}
        for iFactorName in factor_names:
            Data[iFactorName] = raw_data[iFactorName].groupby(axis=0, level=[0, 1]).apply(Operator).unstack()
        Data = pd.Panel(Data).loc[factor_names, :, ids]
        LookBack = args.get("回溯天数", self.LookBack)
        return self._adjustDataDTID(Data, LookBack, factor_names, ids, dts, args.get("只起始日回溯", self.OnlyStartLookBack), args.get("只回溯非目标日", self.OnlyLookBackNontarget))


# 窄因子表, 表结构特征:
# 因子字段: 字段值为因子名
# 因子值字段: 字段值为因子值 
Example 13
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        FactorValueField = args.get("因子值字段", self.FactorValueField)
        FactorField = args.get("因子字段", self.FactorField)
        raw_data = raw_data.set_index(["QS_DT", "ID", FactorField]).iloc[:, 0]
        raw_data = raw_data.unstack()
        isDouble = (_identifyDataType(self._FactorInfo.loc[FactorValueField, "DataType"])=="double")
        Data = {}
        for iFactorName in factor_names:
            if iFactorName in raw_data:
                iRawData = raw_data[iFactorName].unstack()
                if isDouble: iRawData = iRawData.astype("float")
                Data[iFactorName] = iRawData
        if not Data: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        Data = pd.Panel(Data).loc[factor_names]
        LookBack = args.get("回溯天数", self.LookBack)
        return self._adjustDataDTID(Data, LookBack, factor_names, ids, dts, args.get("只起始日回溯", self.OnlyStartLookBack)) 
Example 14
Project: QuantStudio   Author: Scorpi000   File: TinySoftDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_prepareRawData__(self, factor_names, ids, dts, args={}):
        Fields = self._FactorDB._FactorInfo["DBFieldName"].loc[self.Name].loc[factor_names].tolist()
        CodeStr = "return select "+"['date'],['"+("'],['".join(Fields))+"'] "
        CodeStr += "from tradetable datekey inttodate("+dts[0].strftime("%Y%m%d")+") "
        CodeStr += "to (inttodate("+dts[-1].strftime("%Y%m%d")+")+0.9999) of '{ID}' end;"
        Data = {}
        for iID in ids:
            iCodeStr = CodeStr.format(ID="".join(reversed(iID.split("."))))
            ErrorCode, iData, Msg = self._FactorDB._TSLPy.RemoteExecute(iCodeStr, {})
            if ErrorCode!=0: raise __QS_Error__("TinySoft 执行错误: "+Msg.decode("gbk"))
            if iData: Data[iID] = pd.DataFrame(iData).set_index([b"date"])
        if not Data: return pd.Panel(Data)
        Data = pd.Panel(Data).swapaxes(0, 2)
        Data.major_axis = [dt.datetime(*self._FactorDB._TSLPy.DecodeDateTime(iDT)) for iDT in Data.major_axis]
        Data.items = [(iCol.decode("gbk") if isinstance(iCol, bytes) else iCol) for i, iCol in enumerate(Data.items)]
        Data = Data.loc[Fields]
        Data.items = factor_names
        return Data 
Example 15
Project: QuantStudio   Author: Scorpi000   File: TinySoftDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_prepareRawData__(self, factor_names, ids, dts, args={}):
        CycleStr = self._genCycleStr(args.get("周期", self.Cycle), args.get("周期单位", self.CycleUnit))
        Fields = self._FactorDB._FactorInfo["DBFieldName"].loc[self.Name].loc[factor_names].tolist()
        CodeStr = "SetSysParam(pn_cycle(),"+CycleStr+");"
        CodeStr += "return select "+"['date'],['"+"'],['".join(Fields)+"'] "
        CodeStr += "from markettable datekey inttodate("+dts[0].strftime("%Y%m%d")+") "
        CodeStr += "to (inttodate("+dts[-1].strftime("%Y%m%d")+")+0.9999) of '{ID}' end;"
        Data = {}
        for iID in ids:
            iCodeStr = CodeStr.format(ID="".join(reversed(iID.split("."))))
            ErrorCode, iData, Msg = self._FactorDB._TSLPy.RemoteExecute(iCodeStr, {})
            if ErrorCode!=0: raise __QS_Error__("TinySoft 执行错误: "+Msg.decode("gbk"))
            if iData: Data[iID] = pd.DataFrame(iData).set_index([b"date"])
        if not Data: return pd.Panel(Data)
        Data = pd.Panel(Data).swapaxes(0, 2)
        Data.major_axis = [dt.datetime(*self._FactorDB._TSLPy.DecodeDateTime(iDT)) for iDT in Data.major_axis]
        Data.items = [(iCol.decode("gbk") if isinstance(iCol, bytes) else iCol) for i, iCol in enumerate(Data.items)]
        Data = Data.loc[Fields]
        Data.items = factor_names
        return Data 
Example 16
Project: QuantStudio   Author: Scorpi000   File: WindDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_readData__(self, factor_names=None, ids=None, dts=None, args={}):
        if dts: StartDate, EndDate = dts[0].date(), dts[-1].date()
        else: StartDate, EndDate = None, None
        FillNa = args.get("缺失填充", self.FillNa)
        if FillNa: StartDate -= dt.timedelta(args.get("回溯天数", self.LookBack))
        if factor_names is None: factor_names = self.FactorNames
        RawData = self._getRawData(factor_names, ids, StartDate, EndDate, args=args)
        RawData = RawData.set_index(["日期", "ID"])
        DataType = self.getFactorMetaData(factor_names=factor_names, key="DataType")
        Data = {}
        for iFactorName in RawData.columns:
            iRawData = RawData[iFactorName].unstack()
            if DataType[iFactorName]=="double":
                iRawData = iRawData.astype("float")
            Data[iFactorName] = iRawData
        Data = pd.Panel(Data).loc[factor_names]
        Data.major_axis = [dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis]
        Data = adjustDateTime(Data, dts, fillna=FillNa, method="pad")
        if ids is not None: Data = Data.loc[:, :, ids]
        return Data 
Example 17
Project: QuantStudio   Author: Scorpi000   File: WindDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_readData__(self, factor_names=None, ids=None, dts=None, args={}):
        if dts: StartDate, EndDate = dts[0].date(), dts[-1].date()
        else: StartDate, EndDate = None, None
        if factor_names is None: factor_names = self.FactorNames
        RawData = self._getRawData(factor_names, ids, start_date=StartDate, end_date=EndDate, args=args)
        if StartDate is None:
            StartDate = dt.datetime.strptime(np.min(RawData["纳入日期"].values), "%Y%m%d").date()
            DateSeries = getDateSeries(StartDate, dt.date.today())
        else:
            DateSeries = getDateSeries(dts[0].date(), dts[-1].date())
        Data = {}
        for iIndexID in factor_names:
            iRawData = RawData[RawData["指数ID"]==iIndexID].set_index(["ID"])
            iData = pd.DataFrame(0, index=DateSeries, columns=pd.unique(iRawData.index))
            for jID in iData.columns:
                jIDRawData = iRawData.loc[[jID]]
                for k in range(jIDRawData.shape[0]):
                    kStartDate = dt.datetime.strptime(jIDRawData["纳入日期"].iloc[k], "%Y%m%d").date()
                    kEndDate = (dt.datetime.strptime(jIDRawData["剔除日期"].iloc[k], "%Y%m%d").date()-dt.timedelta(1) if jIDRawData["剔除日期"].iloc[k] is not None else dt.date.today())
                    iData[jID].loc[kStartDate:kEndDate] = 1
            Data[iIndexID] = iData
        Data = pd.Panel(Data).loc[factor_names]
        Data.major_axis = [dt.datetime.combine(iDate, dt.time(0)) for iDate in Data.major_axis]
        Data.fillna(value=0, inplace=True)
        return adjustDateTime(Data, dts, fillna=True, method="bfill") 
Example 18
Project: QuantStudio   Author: Scorpi000   File: SQLDB.py    GNU General Public License v3.0 6 votes vote down vote up
def _adjustData(data, look_back, factor_names, ids, dts):
    if ids is not None:
        data = pd.Panel(data).loc[factor_names, :, ids]
    else:
        data = pd.Panel(data).loc[factor_names, :, :]
    if look_back==0:
        if dts is not None:
            return data.loc[:, dts]
        else:
            return data
    if dts is not None:
        AllDTs = data.major_axis.union(dts).sort_values()
        data = data.loc[:, AllDTs, :]
    if np.isinf(look_back):
        for i, iFactorName in enumerate(data.items): data.iloc[i].fillna(method="pad", inplace=True)
    else:
        data = dict(data)
        Limits = look_back*24.0*3600
        for iFactorName in data: data[iFactorName] = fillNaByLookback(data[iFactorName], lookback=Limits)
        data = pd.Panel(data).loc[factor_names]
    if dts is not None:
        return data.loc[:, dts]
    else:
        return data 
Example 19
Project: QuantStudio   Author: Scorpi000   File: SQLDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["QS_DT", "ID"])
        ValueType = args.get("因子值类型", self.ValueType)
        if ValueType=="list":
            return self._calcListData(raw_data, factor_names, ids, dts, args=args)
        elif ValueType=="scalar":
            if not raw_data.index.is_unique:
                FilterStr = args.get("筛选条件", self.FilterCondition)
                raise __QS_Error__("筛选条件: '%s' 无法保证唯一性!" % FilterStr)
        else:
            if not raw_data.index.is_unique:
                return self._calcListData(raw_data, factor_names, ids, dts, args=args)
        DataType = self.getFactorMetaData(factor_names=factor_names, key="DataType")
        Data = {}
        for iFactorName in raw_data.columns:
            iRawData = raw_data[iFactorName].unstack()
            if DataType[iFactorName]=="double": iRawData = iRawData.astype("float")
            Data[iFactorName] = iRawData
        return _adjustData(Data, args.get("回溯天数", self.LookBack), factor_names, ids, dts) 
Example 20
Project: QuantStudio   Author: Scorpi000   File: WindDB2.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["日期", "ID"])
        DataType = self.getFactorMetaData(factor_names=factor_names, key="DataType")
        Data = {}
        for iFactorName in raw_data.columns:
            iRawData = raw_data[iFactorName].unstack()
            if DataType[iFactorName]=="double": iRawData = iRawData.astype("float")
            Data[iFactorName] = iRawData
        Data = pd.Panel(Data).loc[factor_names]
        Data.major_axis = [dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis]
        LookBack = args.get("回溯天数", self.LookBack)
        if LookBack==0: return Data.loc[:, dts, ids]
        AllDTs = Data.major_axis.union(dts).sort_values()
        Data = Data.loc[:, AllDTs, ids]
        Limits = LookBack*24.0*3600
        for i, iFactorName in enumerate(Data.items):
            Data.iloc[i] = fillNaByLookback(Data.iloc[i], lookback=Limits)
        return Data.loc[:, dts] 
Example 21
Project: QuantStudio   Author: Scorpi000   File: WindDB2.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["日期", "ID"])
        Operator = args.get("算子", self.Operator)
        if Operator is None: Operator = (lambda x: x.tolist())
        Data = {}
        for iFactorName in raw_data.columns:
            Data[iFactorName] = raw_data[iFactorName].groupby(axis=0, level=[0, 1]).apply(Operator).unstack()
        Data = pd.Panel(Data).loc[factor_names, :, ids]
        Data.major_axis = [dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis]
        LookBack = args.get("回溯天数", self.LookBack)
        if LookBack==0: return Data.loc[:, dts, :]
        AllDTs = Data.major_axis.union(dts).sort_values()
        Data = Data.loc[:, AllDTs, ids]
        Limits = LookBack*24.0*3600
        for i, iFactorName in enumerate(Data.items):
            Data.iloc[i] = fillNaByLookback(Data.iloc[i], lookback=Limits)
        return Data.loc[:, dts] 
Example 22
Project: QuantStudio   Author: Scorpi000   File: WindDB2.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        StartDate, EndDate = dts[0].date(), dts[-1].date()
        DateSeries = getDateSeries(StartDate, EndDate)
        Data = {}
        for iIndexID in factor_names:
            iRawData = raw_data[raw_data[self._GroupField]==iIndexID].set_index([self._IDField])
            iData = pd.DataFrame(0, index=DateSeries, columns=pd.unique(iRawData.index))
            for jID in iData.columns:
                jIDRawData = iRawData.loc[[jID]]
                for k in range(jIDRawData.shape[0]):
                    kStartDate = dt.datetime.strptime(jIDRawData[self._InDateField].iloc[k], "%Y%m%d").date()
                    kEndDate = (dt.datetime.strptime(jIDRawData[self._OutDateField].iloc[k], "%Y%m%d").date()-dt.timedelta(1) if jIDRawData[self._OutDateField].iloc[k] is not None else dt.date.today())
                    iData[jID].loc[kStartDate:kEndDate] = 1
            Data[iIndexID] = iData
        Data = pd.Panel(Data)
        if Data.minor_axis.intersection(ids).shape[0]==0: return pd.Panel(0.0, items=factor_names, major_axis=dts, minor_axis=ids)
        Data = Data.loc[factor_names, :, ids]
        Data.major_axis = [dt.datetime.combine(iDate, dt.time(0)) for iDate in Data.major_axis]
        Data.fillna(value=0, inplace=True)
        return adjustDateTime(Data, dts, fillna=True, method="bfill") 
Example 23
Project: recruit   Author: Frank-qlu   File: test_concat.py    Apache License 2.0 6 votes vote down vote up
def test_panel_concat_buglet(self, sort):
        # #2257
        def make_panel():
            index = 5
            cols = 3

            def df():
                return DataFrame(np.random.randn(index, cols),
                                 index=["I%s" % i for i in range(index)],
                                 columns=["C%s" % i for i in range(cols)])
            return Panel({"Item%s" % x: df() for x in ['A', 'B', 'C']})

        panel1 = make_panel()
        panel2 = make_panel()

        panel2 = panel2.rename(major_axis={x: "%s_1" % x
                                           for x in panel2.major_axis})

        panel3 = panel2.rename(major_axis=lambda x: '%s_1' % x)
        panel3 = panel3.rename(minor_axis=lambda x: '%s_1' % x)

        # it works!
        concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort) 
Example 24
Project: arctic   Author: man-group   File: _pandas_ndarray_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def can_write_type(data):
        return isinstance(data, Panel) 
Example 25
Project: arctic   Author: man-group   File: _pandas_ndarray_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
        raise ValueError('Appending not supported for pandas.Panel') 
Example 26
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def panel(i1, i2, i3):
    return Panel(np.random.randn(i1, i2, i3), range(i1), ['A%d' % i for i in range(i2)],
                 list(rrule(DAILY, count=i3, dtstart=dt(1970, 1, 1), interval=1))) 
Example 27
Project: arctic   Author: man-group   File: test_pandas_store.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def test_panel_save_read_with_nans(library):
    '''Ensure that nan rows are not dropped when calling to_frame.'''
    df1 = DataFrame(data=np.arange(4).reshape((2, 2)), index=['r1', 'r2'], columns=['c1', 'c2'])
    df2 = DataFrame(data=np.arange(6).reshape((3, 2)), index=['r1', 'r2', 'r3'], columns=['c1', 'c2'])
    p_in = Panel(data=dict(i1=df1, i2=df2))

    library.write('pandas', p_in)
    p_out = library.read('pandas').data

    assert p_in.shape == p_out.shape
    # check_names is False because pandas helpfully names the axes for us.
    assert_frame_equal(p_in.iloc[0], p_out.iloc[0], check_names=False)
    assert_frame_equal(p_in.iloc[1], p_out.iloc[1], check_names=False) 
Example 28
Project: https-github.com-ZhengyaoJiang-PGPortfolio   Author: stevep2007   File: data.py    GNU General Public License v3.0 5 votes vote down vote up
def panel_fillna(panel, type="bfill"):
    """
    fill nan along the 3rd axis
    :param panel: the panel to be filled
    :param type: bfill or ffill
    """
    frames = {}
    for item in panel.items:
        if type == "both":
            frames[item] = panel.loc[item].fillna(axis=1, method="bfill").\
                fillna(axis=1, method="ffill")
        else:
            frames[item] = panel.loc[item].fillna(axis=1, method=type)
    return pd.Panel(frames) 
Example 29
Project: ldpop   Author: popgenmethods   File: proposal.py    MIT License 5 votes vote down vote up
def __init__(self, n, theta, rhos, pop_sizes,
                 times, numTimePointsPerEpoch, processes=1):
        start = time.time()
        epochLengths = epochTimesToIntervalLengths(times)
        # All possible configs
        states = MoranStatesFinite(2*n)
        moranRates = MoranRates(states)

        executor = Pool(processes)
        likelihoodDictList = list(map(
            states.ordered_log_likelihoods,
            executor.map(ordered_wrapper, [(moranRates, rho, theta,
                                            pop_sizes, epochLengths,
                                            numTimePointsPerEpoch)
                                           for rho in rhos])))
        executor.close()
        executor.join()

        data = {}
        for rho, likelihoodDict in zip(rhos, likelihoodDictList):
            timeList = list(likelihoodDict.keys())
            timeList.sort()
            index, rows = [], []
            for config in sorted(likelihoodDict[0.0].keys()):
                config_dict = dict(config)
                index += [' '.join([str(config_dict[hap]) for hap in haps])]
                rows += [[math.exp(likelihoodDict[disc_time][config])
                          for disc_time in timeList]]
            data[rho] = pandas.DataFrame(rows, index=index, columns=timeList)

        self.panel = pandas.Panel(data)
        end = time.time()
        logging.info('Computed lookup table in %f seconds ' % (end-start))
        self.num_haps = 2*n
        self.theta = theta
        self.pop_sizes = pop_sizes
        self.times = times

    # TODO: fix w.r.t. dealing with pandas stuff above 
Example 30
Project: QuantStudio   Author: Scorpi000   File: Regression.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        FactorIDs, PriceIDs = self._Output.pop("因子ID"), self._Output.pop("证券ID")
        DTs = sorted(self._Output["滚动统计量"]["t统计量"])
        self._Output["最后一期统计量"] = pd.DataFrame({"R平方": self._Output["滚动统计量"]["R平方"][-1], "调整R平方": self._Output["滚动统计量"]["调整R平方"][-1],
                                                      "F统计量": self._Output["滚动统计量"]["F统计量"][-1]}, index=PriceIDs).loc[:, ["R平方", "调整R平方", "F统计量"]]
        Index = pd.MultiIndex.from_product([self.TestFactors, FactorIDs], names=["因子", "因子ID"])
        if self.Constant: Index = Index.insert(0, ("Constant", "Constant"))
        self._Output["最后一期t统计量"] = pd.DataFrame(self._Output["滚动统计量"]["t统计量"][DTs[-1]], index=Index, columns=PriceIDs).reset_index()
        self._Output["全样本统计量"], self._Output["全样本t统计量"] = pd.DataFrame(index=PriceIDs, columns=["R平方", "调整R平方", "F统计量"]), pd.DataFrame(index=Index, columns=PriceIDs)
        X = self._Output["因子值"]
        if self.Constant: X = sm.add_constant(X, prepend=True)
        for i, iID in enumerate(PriceIDs):
            Y = self._Output["收益率"][:, i]
            try:
                Result = sm.OLS(Y, X, missing="drop").fit()
                self._Output["全样本统计量"].iloc[i, 0] = Result.rsquared
                self._Output["全样本统计量"].iloc[i, 1] = Result.rsquared_adj
                self._Output["全样本统计量"].iloc[i, 2] = Result.fvalue
                self._Output["全样本t统计量"].iloc[:, i] = Result.tvalues
            except:
                pass
        self._Output["滚动统计量"]["R平方"] = pd.DataFrame(self._Output["滚动统计量"]["R平方"], index=DTs, columns=PriceIDs)
        self._Output["滚动统计量"]["调整R平方"] = pd.DataFrame(self._Output["滚动统计量"]["调整R平方"], index=DTs, columns=PriceIDs)
        self._Output["滚动统计量"]["F统计量"] = pd.DataFrame(self._Output["滚动统计量"]["F统计量"], index=DTs, columns=PriceIDs)
        self._Output["滚动t统计量"] = pd.Panel(self._Output["滚动统计量"].pop("t统计量"), major_axis=Index, minor_axis=PriceIDs)
        self._Output["滚动t统计量"] = self._Output["滚动t统计量"].swapaxes(0, 2).to_frame(filter_observations=False).reset_index()
        self._Output["滚动t统计量"].columns = ["因子", "因子ID", "时点"]+PriceIDs
        self._Output.pop("收益率"), self._Output.pop("因子值")
        return 0 
Example 31
Project: QuantStudio   Author: Scorpi000   File: Spread.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        DTs = sorted(self._Output["统计量"])
        self._Output["最后一期检验"] = {"统计量": self._Output["统计量"][DTs[-1]], "p值": self._Output["p值"][DTs[-1]]}
        Price = self._Output.pop("价格")
        if np.isinf(self.SummaryWindow) and (DTs[-1]==self._iDT) and (not self.IDFilter):
            self._Output["全样本检验"] = deepcopy(self._Output["最后一期检验"])
        else:
            Mask = pd.notnull(Price)
            Statistics, pValue = np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan), np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan)
            for i in range(Price.shape[1]):
                for j in range(i+1, Price.shape[1]):
                    ijMask = (Mask[:, i] & Mask[:, j])
                    try:
                        iRslt = sm.tsa.stattools.coint(Price[:,i][ijMask], Price[:,j][ijMask], **self.CointArgs)
                        Statistics[i, j] = Statistics[j, i] = iRslt[0]
                        pValue[i, j] = pValue[j, i] = iRslt[1]
                    except:
                        pass
            self._Output["全样本检验"] = {"统计量": pd.DataFrame(Statistics, index=self._IDs, columns=self._IDs), "p值": pd.DataFrame(pValue, index=self._IDs, columns=self._IDs)}
        self._Output["滚动检验"] = {"统计量": pd.Panel(self._Output.pop("统计量")).loc[DTs].swapaxes(0, 1).to_frame(filter_observations=False).reset_index(),
                                    "p值": pd.Panel(self._Output.pop("p值")).loc[DTs].swapaxes(0, 1).to_frame(filter_observations=False).reset_index()}
        Cols = self._Output["滚动检验"]["统计量"].columns.tolist()
        Cols[0], Cols[1] = "时点", "ID"
        self._Output["滚动检验"]["统计量"].columns = self._Output["滚动检验"]["p值"].columns = Cols
        return 0 
Example 32
Project: QuantStudio   Author: Scorpi000   File: Difference.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        FactorData, Return, PriceIDs = self._Output.pop("因子值"), self._Output.pop("收益率"), self._Output.pop("证券ID")
        self._Output["全样本t统计量"], self._Output["全样本p值"] = {}, {}
        Mask = np.full(shape=(FactorData.shape[0], self.GroupNum), fill_value=False)
        for j in range(self.GroupNum):
            if j==0: Mask[FactorData<=np.percentile(FactorData, (j+1)/self.GroupNum*100), j] = True
            else: Mask[((FactorData>np.percentile(FactorData, j/self.GroupNum*100)) & (FactorData<=np.percentile(FactorData, (j+1)/self.GroupNum*100))), j] = True
        for i, iID in enumerate(PriceIDs):
            itStat, ipValue = np.full(shape=(self.GroupNum, self.GroupNum), fill_value=np.nan), np.full(shape=(self.GroupNum, self.GroupNum), fill_value=np.nan)
            for j in range(self.GroupNum):
                for k in range(j+1, self.GroupNum):
                    jkResult = stats.ttest_ind(Return[Mask[:, j], i], Return[Mask[:, k], i], equal_var=False, nan_policy="omit")
                    itStat[j, k], ipValue[j, k] = jkResult.statistic, jkResult.pvalue
                    itStat[k, j], ipValue[k, j] = -itStat[j, k], ipValue[j, k]
            self._Output["全样本t统计量"][iID], self._Output["全样本p值"][iID] = itStat, ipValue
        DTs = sorted(self._Output["滚动t统计量"][PriceIDs[0]])
        for iID in PriceIDs:
            self._Output["滚动t统计量"][iID] = pd.Panel(self._Output["滚动t统计量"][iID]).to_frame(filter_observations=False)
            self._Output["滚动p值"][iID] = pd.Panel(self._Output["滚动p值"][iID]).to_frame(filter_observations=False)
            self._Output["全样本t统计量"][iID] = pd.DataFrame(self._Output["全样本t统计量"][iID]).stack(dropna=False)
            self._Output["全样本p值"][iID] = pd.DataFrame(self._Output["全样本p值"][iID]).stack(dropna=False)
        self._Output["滚动t统计量"] = pd.Panel(self._Output["滚动t统计量"]).to_frame(filter_observations=False)
        self._Output["滚动t统计量"].index.names = ["分位数组1", "分位数组2", "时点"]
        self._Output["滚动t统计量"] = self._Output["滚动t统计量"].reset_index()
        self._Output["滚动t统计量"] = self._Output["滚动t统计量"][self._Output["滚动t统计量"]["分位数组1"]!=self._Output["滚动t统计量"]["分位数组2"]]
        self._Output["滚动p值"] = pd.Panel(self._Output["滚动p值"]).to_frame(filter_observations=False)
        self._Output["滚动p值"].index.names = ["分位数组1", "分位数组2", "时点"]
        self._Output["滚动p值"] = self._Output["滚动p值"].reset_index()
        self._Output["滚动p值"] = self._Output["滚动p值"][self._Output["滚动p值"]["分位数组1"]!=self._Output["滚动p值"]["分位数组2"]]
        self._Output["全样本t统计量"] = pd.DataFrame(self._Output["全样本t统计量"]).reset_index()
        self._Output["全样本p值"] = pd.DataFrame(self._Output["全样本p值"]).reset_index()
        self._Output["全样本t统计量"].columns = self._Output["全样本p值"].columns = ["分位数组1", "分位数组2"]+PriceIDs
        self._Output["全样本t统计量"] = self._Output["全样本t统计量"][self._Output["全样本t统计量"]["分位数组1"]!=self._Output["全样本t统计量"]["分位数组2"]]
        self._Output["全样本p值"] = self._Output["全样本p值"][self._Output["全样本p值"]["分位数组1"]!=self._Output["全样本p值"]["分位数组2"]]
        self._Output["最后一期t统计量"] = self._Output["滚动t统计量"][self._Output["滚动t统计量"]["时点"]==DTs[-1]]
        self._Output["最后一期p值"] = self._Output["滚动p值"][self._Output["滚动p值"]["时点"]==DTs[-1]]
        self._Output["最后一期t统计量"].pop("时点")
        self._Output["最后一期p值"].pop("时点")
        return 0 
Example 33
Project: QuantStudio   Author: Scorpi000   File: FactorDBDlg.py    GNU General Public License v3.0 5 votes vote down vote up
def on_CSVImportButton_clicked(self):
        SelectedItems = self.FactorDBTree.selectedItems()
        nSelectedItems = len(SelectedItems)
        if (nSelectedItems>1):
            QMessageBox.critical(self, "错误", "请选择一张表或一个因子!")
            return 0
        elif nSelectedItems==0:
            # 获取新表名
            isOk, TableName = self.getNewTableName()
            if not isOk: return 0
            NewFactorName = "NewFactor"
        else:
            if SelectedItems[0].parent() is None:
                TableName = SelectedItems[0].text(0)
                isOk, NewFactorName = self.getNewFactorName(TableName)
                if not isOk: return 0
            else:
                TableName = SelectedItems[0].parent().text(0)
                NewFactorName = SelectedItems[0].text(0)
        FilePath = QFileDialog.getOpenFileName(parent=self, caption="导入CSV", directory=".", filter="csv (*.csv)")[0]
        if not FilePath: return 0
        if (TableName in self.FactorDB.TableNames) and (NewFactorName in self.FactorDB.getTable(TableName).FactorNames):
            if_exists, isOk = QInputDialog.getItem(self, "因子合并", "因子合并方式:", ["replace", "append", "update"], editable=False)
            if not isOk: return 0
        else:
            if_exists = "update"
        self.setEnabled(False)
        FactorData = loadCSVFactorData(FilePath)
        try:
            self.FactorDB.writeData(pd.Panel({NewFactorName:FactorData}), TableName, if_exists=if_exists)
            self.populateFactorDBTree()
            QMessageBox.information(self, '完成', '导入数据完成!')
        except Exception as e:
            QMessageBox.critical(self, "错误", str(e))
        self.setEnabled(True)
        return 0 
Example 34
Project: QuantStudio   Author: Scorpi000   File: HDF5RDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readFactorCov__(self, dts):
        Data = {}
        with self._RiskDB._DataLock:
            with h5py.File(self._RiskDB.MainDir+os.sep+self._Name+"."+self._RiskDB._Suffix, mode="r") as File:
                Group = File["FactorCov"]
                for iDT in dts:
                    iDTStr = iDT.strftime("%Y-%m-%d %H:%M:%S.%f")
                    if iDTStr not in Group: continue
                    iGroup = Group[iDTStr]
                    iFactors = iGroup["Factor"][...]
                    Data[iDT] = pd.DataFrame(iGroup["Data"][...], index=iFactors, columns=iFactors)
        if Data: return pd.Panel(Data).loc[dts]
        return pd.Panel(items=dts) 
Example 35
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readCov__(self, dts, ids=None):
        return pd.Panel(items=dts, major_axis=ids, minor_axis=ids)
    # 读取协方差矩阵, Panel(items=[时点], major_axis=[ID], minor_axis=[ID]) 
Example 36
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readCov__(self, dts, ids=None):
        FactorCov = self.__QS_readFactorCov__(dts=dts)
        FactorData = self.__QS_readFactorData__(dts=dts, ids=ids)
        SpecificRisk = self.__QS_readSpecificRisk__(dts=dts, ids=ids)
        Data = {}
        for iDT in FactorCov:
            if ids is None:
                iIDs = SpecificRisk.loc[iDT].index
                iFactorData = FactorData.loc[:, iDT].loc[iIDs]
            else:
                iIDs = ids
                iFactorData = FactorData.loc[:, iDT]
            iCov = np.dot(np.dot(iFactorData.values, FactorCov[iDT].values), iFactorData.values.T) + np.diag(SpecificRisk.loc[iDT].values**2)
            Data[iDT] = pd.DataFrame(iCov, index=iIDs, columns=iIDs)
        return pd.Panel(Data).loc[dts] 
Example 37
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readFactorCov__(self, dts):
        return pd.Panel(items=dts) 
Example 38
Project: QuantStudio   Author: Scorpi000   File: RiskDB.py    GNU General Public License v3.0 5 votes vote down vote up
def readFactorCov(self, dts):
        NonCachedDTs, Cov = [], {}
        for iDT in dts:
            iCacheData = self.ErgodicMode._CacheData.get(iDT)
            if iCacheData is None:
                NonCachedDTs.append(iDT)
                continue
            Cov[iDT] = iCacheData["FactorCov"]
        if NonCachedDTs: Cov.update(dict(self.__QS_readFactorCov__(dts=NonCachedDTs)))
        if not Cov: return pd.Panel(items=dts)
        else: return pd.Panel(Cov).loc[dts]
    # 读取特异性风险 
Example 39
Project: QuantStudio   Author: Scorpi000   File: SQLRDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readCov__(self, dts, ids=None):
        SQLStr = "SELECT DateTime, Cov "
        SQLStr += "FROM "+self._DBTableName+" "
        SQLStr += "WHERE ("+genSQLInCondition("DateTime", [iDT.strftime("%Y-%m-%d %H:%M:%S.%f") for iDT in dts], is_str=True, max_num=1000)+") "
        Data = {}
        for iDT, iCov in self._RiskDB.fetchall(SQLStr):
            iCov = pd.read_json(iCov, orient="split")
            iCov.index = iCov.columns
            if ids is not None:
                if iCov.index.intersection(ids).shape[0]>0: iCov = iCov.loc[ids, ids]
                else: iCov = pd.DataFrame(index=ids, columns=ids)
            Data[iDT] = iCov
        if Data: return pd.Panel(Data).loc[dts]
        return pd.Panel(items=dts, major_axis=ids, minor_axis=ids) 
Example 40
Project: QuantStudio   Author: Scorpi000   File: SQLRDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readFactorCov__(self, dts):
        SQLStr = "SELECT DateTime, FactorCov "
        SQLStr += "FROM "+self._DBTableName+" "
        SQLStr += "WHERE FactorCov IS NOT NULL "
        SQLStr += "AND ("+genSQLInCondition("DateTime", [iDT.strftime("%Y-%m-%d %H:%M:%S.%f") for iDT in dts], is_str=True, max_num=1000)+") "
        Data = {}
        for iDT, iCov in self._RiskDB.fetchall(SQLStr):
            iCov = pd.read_json(iCov, orient="split")
            iCov.index = iCov.columns
            Data[iDT] = iCov
        if Data: return pd.Panel(Data).loc[dts]
        return pd.Panel(items=dts) 
Example 41
Project: QuantStudio   Author: Scorpi000   File: SQLRDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_readFactorData__(self, dts, ids=None):
        SQLStr = "SELECT DateTime, FactorData "
        SQLStr += "FROM "+self._DBTableName+" "
        SQLStr += "WHERE FactorData IS NOT NULL "
        SQLStr += "AND ("+genSQLInCondition("DateTime", [iDT.strftime("%Y-%m-%d %H:%M:%S.%f") for iDT in dts], is_str=True, max_num=1000)+") "
        Data = {}
        for iDT, iData in self._RiskDB.fetchall(SQLStr):
            Data[iDT] = pd.read_json(iData, orient="split").T
        if not Data: return pd.Panel(items=[], major_axis=dts, minor_axis=ids)
        Data = pd.Panel(Data).swapaxes(0, 1).loc[:, dts, :]
        if ids is not None:
            if Data.minor_axis.intersection(ids).shape[0]>0: Data = Data.loc[:, :, ids]
            else: Data = pd.Panel(items=Data.items, major_axis=dts, minor_axis=ids)
        return Data 
Example 42
Project: QuantStudio   Author: Scorpi000   File: TushareDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["日期", "ID"])
        DataType = self.getFactorMetaData(factor_names=factor_names, key="DataType")
        Data = {}
        for iFactorName in raw_data.columns:
            iRawData = raw_data[iFactorName].unstack()
            if DataType[iFactorName]=="double": iRawData = iRawData.astype("float")
            Data[iFactorName] = iRawData
        Data = pd.Panel(Data).loc[factor_names]
        Data.major_axis = [dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis]
        return Data.loc[:, dts, ids] 
Example 43
Project: QuantStudio   Author: Scorpi000   File: TushareDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        raw_data = raw_data.set_index(["ID"])
        if raw_data.index.intersection(ids).shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.loc[ids]
        return pd.Panel(raw_data.values.T.reshape((raw_data.shape[1], raw_data.shape[0], 1)).repeat(len(dts), axis=2), items=factor_names, major_axis=ids, minor_axis=dts).swapaxes(1, 2) 
Example 44
Project: QuantStudio   Author: Scorpi000   File: TushareDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        if raw_data.shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        raw_data = raw_data.set_index(["日期", "ID"])
        DataType = self.getFactorMetaData(factor_names=factor_names, key="DataType")
        Data = {}
        for iFactorName in raw_data.columns:
            iRawData = raw_data[iFactorName].unstack()
            if DataType[iFactorName]=="double": iRawData = iRawData.astype("float")
            Data[iFactorName] = iRawData
        Data = pd.Panel(Data).loc[factor_names]
        Data.major_axis = [dt.datetime.strptime(iDate, "%Y%m%d") for iDate in Data.major_axis]
        if Data.minor_axis.intersection(ids).shape[0]==0: return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        LookBack = args.get("回溯天数", self.LookBack)
        if LookBack==0: return Data.loc[:, dts, ids]
        AllDTs = Data.major_axis.union(dts).sort_values()
        Data = Data.loc[:, AllDTs, ids]
        Limits = LookBack*24.0*3600
        for i, iFactorName in enumerate(Data.items):
            Data.iloc[i] = fillNaByLookback(Data.iloc[i], lookback=Limits)
        return Data.loc[:, dts]

# 公告信息表, 表结构特征:
# 公告日期, 表示获得信息的时点;
# 截止日期, 表示信息有效的时点, 该字段可能没有;
# 如果存在截止日期, 以截止日期和公告日期的最大值作为数据填充的时点; 如果不存在截止日期, 以公告日期作为数据填充的时点;
# 数据填充时点和 ID 不能唯一标志一行记录, 对于每个 ID 每个数据填充时点可能存在多个数据, 将所有的数据以 list 组织, 如果算子参数不为 None, 以该算子作用在数据 list 上的结果为最终填充结果, 否则以数据 list 填充;
# 先填充表中已有的数据, 然后根据回溯天数参数填充缺失的时点 
Example 45
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_prepareRawData__(self, factor_names, ids, dts, args={}):
        return None
    # 计算数据的接口, 返回: Panel(item=[因子], major_axis=[时间点], minor_axis=[ID]) 
Example 46
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_calcData__(self, raw_data, factor_names, ids, dts, args={}):
        return None
    # 读取数据, 返回: Panel(item=[因子], major_axis=[时间点], minor_axis=[ID]) 
Example 47
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _readData_FactorCacheMode(self, factor_names, ids, dts, args={}):
        self.ErgodicMode._FactorReadNum[factor_names] += 1
        if (self.ErgodicMode.MaxFactorCacheNum<=0) or (not self.ErgodicMode._CacheDTs) or (dts[0]<self.ErgodicMode._CacheDTs[0]) or (dts[-1]>self.ErgodicMode._CacheDTs[-1]):
            #print("超出缓存区读取: "+str(factor_names))# debug
            return self.__QS_calcData__(raw_data=self.__QS_prepareRawData__(factor_names=factor_names, ids=ids, dts=dts, args=args), factor_names=factor_names, ids=ids, dts=dts, args=args)
        Data = {}
        DataFactorNames = []
        CacheFactorNames = set()
        PopFactorNames = []
        for iFactorName in factor_names:
            iFactorData = self.ErgodicMode._CacheData.get(iFactorName)
            if iFactorData is None:# 尚未进入缓存
                if self.ErgodicMode._CacheFactorNum<self.ErgodicMode.MaxFactorCacheNum:# 当前缓存因子数小于最大缓存因子数,那么将该因子数据读入缓存
                    self.ErgodicMode._CacheFactorNum += 1
                    CacheFactorNames.add(iFactorName)
                else:# 当前缓存因子数等于最大缓存因子数,那么将检查最小读取次数的因子
                    CacheFactorReadNum = self.ErgodicMode._FactorReadNum[self.ErgodicMode._CacheData.keys()]
                    MinReadNumInd = CacheFactorReadNum.argmin()
                    if CacheFactorReadNum.loc[MinReadNumInd]<self.ErgodicMode._FactorReadNum[iFactorName]:# 当前读取的因子的读取次数超过了缓存因子读取次数的最小值,缓存该因子数据
                        CacheFactorNames.add(iFactorName)
                        PopFactor = MinReadNumInd
                        self.ErgodicMode._CacheData.pop(PopFactor)
                        PopFactorNames.append(PopFactor)
                    else:
                        DataFactorNames.append(iFactorName)
            else:
                Data[iFactorName] = iFactorData
        CacheFactorNames = list(CacheFactorNames)
        if CacheFactorNames:
            #print("尚未进入缓存区读取: "+str(CacheFactorNames))# debug
            iData = dict(self.__QS_calcData__(raw_data=self.__QS_prepareRawData__(factor_names=CacheFactorNames, ids=self.ErgodicMode._IDs, dts=self.ErgodicMode._CacheDTs, args=args), factor_names=CacheFactorNames, ids=self.ErgodicMode._IDs, dts=self.ErgodicMode._CacheDTs, args=args))
            Data.update(iData)
            self.ErgodicMode._CacheData.update(iData)
        self.ErgodicMode._Queue2SubProcess.put((None, (CacheFactorNames, PopFactorNames)))
        Data = pd.Panel(Data)
        if Data.shape[0]>0: Data = Data.loc[:, dts, ids]
        if not DataFactorNames: return Data.loc[factor_names]
        #print("超出缓存区因子个数读取: "+str(DataFactorNames))# debug
        return self.__QS_calcData__(raw_data=self.__QS_prepareRawData__(factor_names=DataFactorNames, ids=ids, dts=dts, args=args), factor_names=DataFactorNames, ids=ids, dts=dts, args=args).join(Data).loc[factor_names] 
Example 48
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _readData_ErgodicMode(self, factor_names, ids, dts, args={}):
        if self.ErgodicMode.CacheMode=="因子": return self._readData_FactorCacheMode(factor_names=factor_names, ids=ids, dts=dts, args=args)
        return pd.Panel({iID: self._readIDData(iID, factor_names=factor_names, dts=dts, args=args) for iID in ids}).swapaxes(0, 2)
    # 启动遍历模式, dts: 遍历的时间点序列或者迭代器 
Example 49
Project: QuantStudio   Author: Scorpi000   File: FactorDB.py    GNU General Public License v3.0 5 votes vote down vote up
def getDateTime(self, iid=None, start_dt=None, end_dt=None):
        if (self._OperationMode is not None) and (self._OperationMode._isStarted): return self._OperationMode.DateTimes
        if self._FactorTable is not None: return self._FactorTable.getDateTime(ifactor_name=self._NameInFT, iid=iid, start_dt=start_dt, end_dt=end_dt, args=self.Args)
        return []
    # --------------------------------数据读取---------------------------------
    # 读取数据, 返回: Panel(item=[因子], major_axis=[时间点], minor_axis=[ID]) 
Example 50
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _adjustDataDTID(self, data, look_back, factor_names, ids, dts, only_start_lookback=False, only_lookback_nontarget=False):
        if look_back==0:
            try:
                return data.loc[:, dts, ids]
            except KeyError as e:
                self._QS_Logger.warning("待提取的因子数据超出了因子表 '%s' 原始数据的时点或 ID 范围, 将填充缺失值!" % self.Name)
                return pd.Panel(items=factor_names, major_axis=dts, minor_axis=ids)
        AllDTs = data.major_axis.union(dts).sort_values()
        AdjData = data.loc[:, AllDTs, ids]
        if only_start_lookback:# 只在起始时点回溯填充缺失
            AllAdjData = AdjData
            AdjData = AllAdjData.loc[:, :dts[0], :]
            TargetDTs = dts[:1]
        else:
            TargetDTs = dts
        if only_lookback_nontarget:# 只用非目标时间序列的数据回溯填充
            Mask = pd.Series(np.full(shape=(AdjData.shape[1], ), fill_value=False, dtype=np.bool), index=AdjData.major_axis)
            Mask[TargetDTs] = True
            FillMask = Mask.copy()
            FillMask[Mask.astype("int").diff()!=1] = False
            TimeDelta = pd.Series(np.r_[0, np.diff(Mask.index.values) / np.timedelta64(1, "D")], index=Mask.index)
            TimeDelta[(Mask & (~FillMask)) | (Mask.astype("int").diff()==-1)] = 0
            TimeDelta = TimeDelta.cumsum().loc[TargetDTs].diff().fillna(value=0)
            Limits = pd.DataFrame(0, index=AdjData.major_axis, columns=AdjData.minor_axis)
            Limits.loc[TargetDTs, :] = np.minimum(TimeDelta.values, look_back).reshape((TimeDelta.shape[0], 1)).repeat(Limits.shape[1], axis=1)
            Limits = Limits*24.0*3600
        else:
            Limits = look_back*24.0*3600
        if np.isinf(look_back) and (not only_lookback_nontarget):
            for i, iFactorName in enumerate(AdjData.items): AdjData.iloc[i].fillna(method="pad", inplace=True)
        else:
            AdjData = dict(AdjData)
            for iFactorName in AdjData: AdjData[iFactorName] = fillNaByLookback(AdjData[iFactorName], lookback=Limits)
            AdjData = pd.Panel(AdjData).loc[factor_names]
        if only_start_lookback:
            AllAdjData.loc[:, dts[0], :] = AdjData.loc[:, dts[0], :]
            return AllAdjData.loc[:, dts]
        else:
            return AdjData.loc[:, dts]