Python pandas.notnull() Examples

The following are code examples for showing how to use pandas.notnull(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: gullikson-scripts   Author: kgullikson88   File: Sensitivity.py    MIT License 6 votes vote down vote up
def get_sec_spt(row):
    """
    Get the secondary spectral type from the information we have. Meant to be
    called as the `apply` method of a pandas DataFrame.
    """
    if pd.notnull(row['Sp2']):
        return row['Sp2']
    elif pd.notnull(row['Sp1']) and pd.notnull(row['mag1']) and pd.notnull(row['mag2']):
        # TODO: Do better than assuming V band!
        band = 'V'
        absmag_prim = MS.GetAbsoluteMagnitude(row['Sp1'], color=band)
        dm = float(row['mag1']) - absmag_prim
        absmag_sec = float(row['mag2']) - dm
        return MS.GetSpectralType_FromAbsMag(absmag_sec, color=band)[0]
    elif pd.notnull(row['Sp1']) and pd.notnull(row['K1']) and pd.notnull(row['K2']):
        mass = MS.Interpolate('mass', row['Sp1'])
        q = float(row['K1']) / float(row['K2'])
        sec_mass = q * mass
        return MS.GetSpectralType('mass', sec_mass)[0]
    else:
        print(row)
        raise ValueError('Must give enough information to figure out the spectral type!') 
Example 2
Project: matchminer-engine   Author: dfci   File: utilities.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def add_matches(trial_matches_df, db):
    """Add the match table to the database or update what already exists theres"""

    if 'clinical_id' in trial_matches_df.columns:
        trial_matches_df['clinical_id'] = trial_matches_df['clinical_id'].apply(lambda x: str(x))

    if 'genomic_id' in trial_matches_df.columns:
        trial_matches_df['genomic_id'] = trial_matches_df['genomic_id'].apply(lambda x: str(x))

    if 'report_date' in trial_matches_df.columns:
        trial_matches_df['report_date'] = trial_matches_df['report_date'].apply(
            lambda x: dt.datetime.strftime(x, '%Y-%m-%d %X') if pd.notnull(x) else x)

    if len(trial_matches_df.index) > 0:
        db.trial_match.drop()
        for i in range(0, trial_matches_df.shape[0], 1000):
            records = json.loads(trial_matches_df[i:i + 1000].T.to_json()).values()
            db.trial_match.insert_many(records) 
Example 3
Project: lifestyles   Author: CamDavidsonPilon   File: cbc_hb.py    MIT License 6 votes vote down vote up
def _create_observation_variable(individual_selections, choices, partsworth):
    """
    This function handles creating the PyMC3 observation variables.  It also gracefully handles missing observations in individual selections.

    `individual_selections` is a Series of the individuals selections made, starting from 0. It can contain NaNs which represent answer was not provided.

    `choices` is a DataFrame with a hierarchical index: level=0 enumerates the choices, and level=1 displays the profile at a specific choice.
    It's size is (n_questions, n_choices_per_question).

    `partsworth` is a slice of PyMC3 matrix. It represents the partsworth variables of a individual. Size is (n_profiles,)

    This computes the values exp(partsworth * profile_j) / sum[ exp(partsworth * profile_k ] for all j.
    """
    nan_mask = pd.notnull(individual_selections)
    return pm.Categorical("Obs_%s" % individual_selections.name,
                          tt.nnet.softmax(tt.stack([
                            tt.dot(choice.values, partsworth) for _, choice in choices[nan_mask.values].groupby(axis=1, level=0)
                          ], axis=0).T),
                          observed=individual_selections[nan_mask.values].values) 
Example 4
Project: recordlinkage   Author: J535D165   File: utils.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fillna(series_or_arr, missing_value=0.0):
    """Fill missing values in pandas objects and numpy arrays.

    Arguments
    ---------
    series_or_arr : pandas.Series, numpy.ndarray
        The numpy array or pandas series for which the missing values
        need to be replaced.
    missing_value : float, int, str
        The value to replace the missing value with. Default 0.0.

    Returns
    -------
    pandas.Series, numpy.ndarray
        The numpy array or pandas series with the missing values
        filled.
    """

    if pandas.notnull(missing_value):
        if isinstance(series_or_arr, (numpy.ndarray)):
            series_or_arr[numpy.isnan(series_or_arr)] = missing_value
        else:
            series_or_arr.fillna(missing_value, inplace=True)

    return series_or_arr 
Example 5
Project: QuantStudio   Author: Scorpi000   File: AbnormalReturn.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        Mask = (self._Output["事件记录"][:, 2]<=self.EventPostWindow)
        if np.sum(Mask)>0:
            RowPos, ColPos = np.arange(self._Output["异常收益率"].shape[0])[Mask].tolist(), (self._Output["事件记录"][Mask, 2]+self.EventPreWindow).astype(np.int)
            for i in range(RowPos.shape[0]):
                X = self._Output["市场超额收益率"][RowPos[i], :]
                iMask = pd.notnull(X)
                X = sm.add_constant(X[iMask], prepend=True)
                self._Output["异常协方差"][RowPos[i], iMask, iMask] = (np.eye(X.shape[0])+np.dot(np.dot(X, np.linalg.inv(np.dot(X.T, X))), X.T)) * self._Output["Var"][RowPos[i]]
        Index = pd.MultiIndex.from_arrays(self._Output["事件记录"][:,:2].T, names=["ID", "时点"])
        self._Output["回归估计量"] = pd.DataFrame(self._Output.pop("Alpha"), index=Index, columns=["Apha"])
        self._Output["回归估计量"]["Beta"] = self._Output.pop("Beta")
        self._Output["回归估计量"]["Sigma2"] = self._Output.pop("Var")
        self._Output.pop("市场超额收益率")
        return 0 
Example 6
Project: QuantStudio   Author: Scorpi000   File: IC.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        CalcDateTimes = self._Output.pop("时点")
        self._Output["股票数"] = pd.DataFrame(self._Output["股票数"], index=CalcDateTimes)
        self._Output["IC"] = pd.DataFrame(self._Output["IC"], index=CalcDateTimes)
        for i, iFactorName in enumerate(self.TestFactors):
            if self.FactorOrder[iFactorName]=="升序": self._Output["IC"][iFactorName] = -self._Output["IC"][iFactorName]
        self._Output["IC的移动平均"] = self._Output["IC"].copy()
        for i in range(len(CalcDateTimes)):
            if i<self.RollAvgPeriod-1: self._Output["IC的移动平均"].iloc[i,:] = np.nan
            else: self._Output["IC的移动平均"].iloc[i,:] = self._Output["IC"].iloc[i-self.RollAvgPeriod+1:i+1, :].mean()
        self._Output["统计数据"] = pd.DataFrame(index=self._Output["IC"].columns)
        self._Output["统计数据"]["平均值"] = self._Output["IC"].mean()
        self._Output["统计数据"]["标准差"] = self._Output["IC"].std()
        self._Output["统计数据"]["最小值"] = self._Output["IC"].min()
        self._Output["统计数据"]["最大值"] = self._Output["IC"].max()
        self._Output["统计数据"]["IC_IR"] = self._Output["统计数据"]["平均值"] / self._Output["统计数据"]["标准差"]
        self._Output["统计数据"]["t统计量"] = np.nan
        self._Output["统计数据"]["平均股票数"] = self._Output["股票数"].mean()
        self._Output["统计数据"]["IC×Sqrt(N)"] = self._Output["统计数据"]["平均值"]*np.sqrt(self._Output["统计数据"]["平均股票数"])
        self._Output["统计数据"]["有效期数"] = 0.0
        for iFactor in self._Output["IC"]: self._Output["统计数据"].loc[iFactor,"有效期数"] = pd.notnull(self._Output["IC"][iFactor]).sum()
        self._Output["统计数据"]["t统计量"] = (self._Output["统计数据"]["有效期数"]**0.5)*self._Output["统计数据"]["IC_IR"]
        return 0 
Example 7
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 6 votes vote down vote up
def maskCategary(data_len,cat_data=None, mask=None):
    if mask is None:
        mask = (np.zeros((data_len,))==0)
    if cat_data is not None:
        cat_data[pd.isnull(cat_data)] = np.nan
        if cat_data.ndim==1:
            cat_data = cat_data.reshape((cat_data.shape[0],1))
        AllCats = [list(pd.unique(cat_data[mask,i])) for i in range(cat_data.shape[1])]
        AllCats = CartesianProduct(AllCats)
    else:
        AllCats = [(np.nan,)]
        cat_data = np.empty((data_len,1),dtype='float')+np.nan
    CatMask = {}
    for i,iCat in enumerate(AllCats):
        iMask = mask
        for j,jSubCat in enumerate(iCat):
            if pd.notnull(jSubCat):
                iMask = (iMask & (cat_data[:,j]==jSubCat))
            else:
                iMask = (iMask & pd.isnull(cat_data[:,j]))
        CatMask[tuple(iCat)] = iMask
    return CatMask
# 准备回归的数据 
Example 8
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 6 votes vote down vote up
def neutralize(Y, X, cov_matrix, mask=None, constant=False, dummy_data=None, drop_dummy_na=False, other_handle='填充None'):
    StdData = np.empty(Y.shape,dtype='float')+np.nan
    if mask is None:
        mask = pd.isnull(StdData)
    cov_matrix = cov_matrix[mask,:][:,mask]
    NotNAMask,_,YY,XX = prepareRegressData(Y[mask], (X[mask] if X is not None else X), has_constant=constant, dummy_data=(dummy_data[mask] if dummy_data is not None else dummy_data),drop_dummy_na=drop_dummy_na)
    Mask = (np.sum(pd.notnull(cov_matrix),axis=1)>0)
    Mask = ((np.sum(pd.isnull(cov_matrix[:,Mask]),axis=1)==0) & Mask & NotNAMask)
    cov_matrix = cov_matrix[Mask,:][:,Mask]
    YY = YY[Mask[NotNAMask]]
    XX = XX[Mask[NotNAMask]]
    if XX.ndim==1:
        XX = np.reshape(XX,(XX.shape[0],1))
    Temp = StdData[mask]
    Temp[Mask] = YY - np.dot(np.dot(np.dot(np.dot(XX,np.linalg.inv(np.dot(np.dot(XX.T,cov_matrix),XX))),XX.T),cov_matrix),YY)
    StdData[mask] = Temp
    if other_handle=="保持不变":
        StdData[~mask] = Y[~mask]
    return StdData
# 合并因子数据
# data: 待处理的数据, [array,...] or array; method: 合成方式, 可选: 直接合成, 归一合成; nan_handle: 缺失处理, 可选: 剩余合成, 填充None 
Example 9
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 6 votes vote down vote up
def merge(data, mask=None, weight=None, method='直接合成', nan_handle='剩余合成'):
    if not isinstance(data,np.ndarray):
        data = np.array(list(zip(*data)))
    elif data.ndim==1:
        data = np.reshape(data,(data.shape[0],1))
    if mask is None:
        mask = (np.zeros(data.shape[0])==0)
    if weight is None:
        weight = np.ones(data.shape[1])/data.shape[1]
    else:
        weight = np.array(weight)
    if method=='归一合成':
        weight = weight/np.sum(np.abs(weight))
    if nan_handle=='填充None':
        StdData = np.sum(data*weight,axis=1)
    elif nan_handle=='剩余合成':
        StdData = np.nansum(data*weight,axis=1)
        if method=="归一合成":
            TotalWeight = np.sum(pd.notnull(data)*np.abs(weight),axis=1)
            TotalWeight[TotalWeight==0] = np.nan
            StdData = StdData/TotalWeight
    StdData[~mask] = np.nan
    return StdData 
Example 10
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 6 votes vote down vote up
def genPortfolioByFiltration(factor_data, ascending=False, target_num=20, target_quantile=0.1, weight=None):
    factor_data = factor_data[pd.notnull(factor_data)]
    factor_data = factor_data.sort_values(inplace=False,ascending=ascending)
    if target_num is not None:
        TargetIDs = set(factor_data.iloc[:target_num].index)
    else:
        TargetIDs = set(factor_data.index)
    if target_quantile is not None:
        if ascending:
            TargetIDs = set(factor_data[factor_data<=factor_data.quantile(target_quantile)].index).intersection(TargetIDs)
        else:
            TargetIDs = set(factor_data[factor_data<=factor_data.quantile(target_quantile)].index).intersection(TargetIDs)
    TargetIDs = list(TargetIDs)
    TargetIDs.sort()
    Portfolio = weight[TargetIDs]
    Portfolio = Portfolio[pd.notnull(Portfolio) & (Portfolio!=0)]
    return Portfolio/Portfolio.sum()
# 生成期货连续合约的价格序列
# id_map: 连续合约每一期的月合约 ID, Series(ID)
# price: 月合约的价格序列, DataFrame(价格, index=id_map.index, columns=[月合约ID])
# adj_direction: 调整方向, 可选: "前复权"(最后一期价格不变), "后复权"(第一期价格不变)
# adj_type: 调整方式, 可选: "收益率不变", "价差不变","价格不变"
# rollover_ahead: 合约展期是否提前一期, bool
# 返回: Series(价格, index=id_map.index) 
Example 11
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def plotCDF(self):# 经验分布图
        SelectedColumn = self.getSelectedColumns()
        if len(SelectedColumn)!=1: return QtWidgets.QMessageBox.critical(self, "错误", "请选择一列!")
        SelectedDF,Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        SelectedDF = SelectedDF.iloc[:,0]
        xData = SelectedDF[pd.notnull(SelectedDF)].values
        xData.sort()
        nData = xData.shape[0]
        Delta = (xData[-1]-xData[0])/nData
        xData = np.append(xData[0]-Delta,xData)
        xData = np.append(xData,xData[-1]+Delta)
        yData = (np.linspace(0,nData+1,nData+2))/(nData)
        yData[-1] = yData[-2]
        GraphObj = [plotly.graph_objs.Scatter(x=xData,y=yData,name="经验分布函数")]
        xNormalData = np.linspace(xData[0],xData[-1],(nData+2)*10)
        yNormalData = stats.norm.cdf(xNormalData,loc=np.mean(xData[1:-1]),scale=np.std(xData[1:-1]))
        GraphObj.append(plotly.graph_objs.Scatter(x=xNormalData,y=yNormalData,name="Normal Distribution"))
        with tempfile.TemporaryFile() as File:
            plotly.offline.plot({"data":GraphObj,"layout": plotly.graph_objs.Layout(title="经验分布")}, filename=File.name)
        return 0 
Example 12
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def calStatistics(self):# 统计量
        SelectedDF, Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        # 设置要统计的索引
        SelectedIndex = self._getDataIndex(list(SelectedDF.index))
        SummaryData = pd.DataFrame(index=['数量','均值','中位数','方差','标准差','最大值','最小值','总和','总积'],columns=[str(iCol) for iCol in SelectedDF.columns])
        for i,iCol in enumerate(SelectedDF.columns):
            iData = SelectedDF.iloc[:,i].loc[SelectedIndex]
            SummaryData.loc['总和'].iloc[i] = iData.sum()
            SummaryData.loc['数量'].iloc[i] = iData[pd.notnull(iData)].shape[0]
            SummaryData.loc['均值'].iloc[i] = iData.mean()
            SummaryData.loc['方差'].iloc[i] = iData.var()
            SummaryData.loc['标准差'].iloc[i] = iData.std()
            SummaryData.loc['中位数'].iloc[i] = iData.median()
            SummaryData.loc['总积'].iloc[i] = iData.prod()
            SummaryData.loc['最大值'].iloc[i] = iData.max()
            SummaryData.loc['最小值'].iloc[i] = iData.min()
        TableWidget = QtWidgets.QTableWidget()
        populateTableWithDataFrame(TableWidget, SummaryData)
        _TableDlg(None, TableWidget).exec_()
        return 0 
Example 13
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def plotHist(self):
        SelectedColumn = self.getSelectedColumns()
        if len(SelectedColumn)!=1: return QtWidgets.QMessageBox.critical(self, "错误", "请选择一列!")
        SelectedDF, Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        SelectedDF = SelectedDF.iloc[:,0]
        GroupNum,isOK = QtWidgets.QInputDialog.getInt(self, "获取分组数", "分组数", value=10, min=1, max=1000, step=1)
        if not isOK: return 0
        tempFigDlg = _MatplotlibWidget()
        Fig = tempFigDlg.Mpl.Fig
        Axes = Fig.add_subplot(111)
        yData = SelectedDF[pd.notnull(SelectedDF)].values
        xData = np.linspace(np.min(yData),np.max(yData),len(yData)*10)
        yNormalData = stats.norm.pdf(xData,loc=np.mean(yData),scale=np.std(yData))
        Axes.hist(yData, GroupNum, density=True, label='直方图', color="b")
        Axes.plot(xData, yNormalData, label='Normal Distribution', linewidth=2, color='r')
        Axes.legend(loc='upper left', shadow=True)
        tempFigDlg.Mpl.draw()
        tempFigDlg.show()
        return 0 
Example 14
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def plotCDF(self):
        SelectedColumn = self.getSelectedColumns()
        if len(SelectedColumn)!=1: return QtWidgets.QMessageBox.critical(self, "错误", "请选择一列!")
        SelectedDF, Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        SelectedDF = SelectedDF.iloc[:,0]
        tempFigDlg = _MatplotlibWidget()
        Fig = tempFigDlg.Mpl.Fig
        Axes = Fig.add_subplot(111)
        xData = SelectedDF[pd.notnull(SelectedDF)].values
        xData.sort()
        nData = len(xData)
        Delta = (xData[-1]-xData[0])/nData
        xData = np.append(xData[0]-Delta,xData)
        xData = np.append(xData,xData[-1]+Delta)
        yData = (np.linspace(0,nData+1,nData+2))/(nData)
        yData[-1] = yData[-2]
        Axes.plot(xData,yData,label='经验分布函数',linewidth=2,color='b')
        xNormalData = np.linspace(xData[0],xData[-1],(nData+2)*10)
        yNormalData = stats.norm.cdf(xNormalData,loc=np.mean(xData[1:-1]),scale=np.std(xData[1:-1]))
        Axes.plot(xNormalData, yNormalData, label='Normal Distribution', linewidth=2, color='r')
        Axes.legend(loc='upper left',shadow=True)
        tempFigDlg.Mpl.draw()
        tempFigDlg.show()
        return 0 
Example 15
Project: QuantStudio   Author: Scorpi000   File: AuxiliaryFun.py    GNU General Public License v3.0 6 votes vote down vote up
def getClassMask(subclass,class_data):
    if isinstance(class_data, np.ndarray):
        Mask = np.array([True]*class_data.shape[0])
    else:
        Mask = pd.Series(True,index=class_data.index)
    if subclass is None:
        return Mask
    if isinstance(class_data, np.ndarray):
        for j,jSubClass in enumerate(subclass):
            if pd.notnull(jSubClass):
                Mask = Mask & (class_data[:,j]==jSubClass)
            else:
                Mask = Mask & pd.isnull(class_data[:,j])
    else:
        for j,jSubClass in enumerate(subclass):
            if pd.notnull(jSubClass):
                Mask = Mask & (class_data.iloc[:,j]==jSubClass)
            else:
                Mask = Mask & pd.isnull(class_data.iloc[:,j])
    return Mask

# 使得两个Series相匹配, 即 index 一致, 缺失的按照指定值填充 
Example 16
Project: QuantStudio   Author: Scorpi000   File: DataTypeConversionFun.py    GNU General Public License v3.0 6 votes vote down vote up
def DummyVarTo01Var(dummy_var,ignore_na=False,ignores=[],ignore_nonstring=False):
    if dummy_var.shape[0]==0:
        return pd.DataFrame()
    NAMask = pd.isnull(dummy_var)
    if ignore_na:
        AllClasses = dummy_var[~NAMask].unique()
    else:
        dummy_var[NAMask] = np.nan
        AllClasses = dummy_var.unique()
    AllClasses = [iClass for iClass in AllClasses if (iClass not in ignores) and ((not ignore_nonstring) or isinstance(iClass,str) or pd.isnull(iClass))]
    OZVar = pd.DataFrame(0.0,index=dummy_var.index,columns=AllClasses,dtype='float')
    for iClass in AllClasses:
        if pd.notnull(iClass):
            iMask = (dummy_var==iClass)
        else:
            iMask = NAMask
        OZVar[iClass][iMask] = 1.0
    return OZVar
# 将DataFrame转化成二重索引的Series,DataFrame的index和columns二重索引。 
Example 17
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 6 votes vote down vote up
def calcBlendingCoefficient(specific_ret):
    Gamma = {}
    for iID in specific_ret.columns:
        iSpecificRet = specific_ret[iID]
        iSpecificRet = iSpecificRet[pd.notnull(iSpecificRet)].values
        ih = iSpecificRet.shape[0]
        if ih==0:
            Gamma[iID]=0
            continue
        iRobustStd = 1/1.35*(np.percentile(iSpecificRet,75)-np.percentile(iSpecificRet,25))
        iSpecificRet[iSpecificRet>10*iRobustStd] = 10*iRobustStd
        iSpecificRet[iSpecificRet<-10*iRobustStd] = -10*iRobustStd
        iStd = np.std(iSpecificRet)
        iZVal = np.abs((iStd-iRobustStd)/iRobustStd)
        Gamma[iID] = min((1,max((0,(ih-60)/120))))*min((1,max((0,np.exp(1-iZVal)))))
    Gamma = pd.Series(Gamma,name='Gamma')
    Gamma[pd.isnull(Gamma)] = 0
    return Gamma
    
# 计算Structural forcast of specific risk 
Example 18
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 6 votes vote down vote up
def calcSTRSpecificRisk(gamma, std_ts, factor_data, cap):
    # 准备回归数据
    IDs = gamma[gamma==1].index.tolist()# 选择gamma值为1的ID
    Y = std_ts.loc[IDs]
    Y[Y==0] = np.nan
    FactorData = factor_data.loc[IDs, :]
    FactorData = FactorData.loc[:, FactorData.abs().sum()!=0]
    RegWeight = calcRegressWeight(cap).loc[IDs]
    # 回归
    Coef = regressWithOneLinearEqConstraint(np.log(Y.values), FactorData.values, RegWeight.values)
    # 估计Scale Multiplier
    Temp = Y.values / np.exp(np.dot(FactorData.values, Coef))
    Mask = (pd.notnull(Temp) & pd.notnull(RegWeight.values))
    E0 = np.nansum(Temp[Mask] * RegWeight.values[Mask]) / np.nansum(RegWeight.values[Mask])
    # 计算Structural forcast of specific risk
    return pd.Series(np.exp(np.dot(factor_data.loc[:, FactorData.columns].values, Coef)) * E0, index=std_ts.index)

# 估计特异性风险, 使用Barra EUE3的方法, 参见EUE3
# specific_ret: DataFrame(收益率,index=[日期],columns=[ID]); forcast_num: 向前预测的期数;
# auto_corr_num: 考虑有自相关性的最大期数; half_life: 时间指数权重半衰期; 
Example 19
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 6 votes vote down vote up
def BayesianShrinkage(specific_risk, cap,quantile_num=10, q=0.1):
    Rslt = pd.Series(np.nan,index=specific_risk.index)
    Mask = pd.notnull(specific_risk)
    specific_risk = specific_risk[Mask]
    cap = cap[Mask]
    for i in range(quantile_num):
        if i==0:
            iIDs = cap[cap<cap.quantile((i+1)/quantile_num)].index
        elif i==quantile_num-1:
            iIDs = cap[cap>=cap.quantile(i/quantile_num)].index
        else:
            iIDs = cap[(cap>=cap.quantile(i/quantile_num)) & (cap<cap.quantile((i+1)/quantile_num))].index
        iSpecificRisk = specific_risk[iIDs]
        iStd = (iSpecificRisk*cap[iIDs]).sum()/cap[iIDs].sum()
        iDelta = ((iSpecificRisk-iStd)**2).sum()/iSpecificRisk.shape[0]
        iv = q*(iSpecificRisk-iStd).abs()/(q*(iSpecificRisk-iStd).abs()+iDelta)
        Rslt[iIDs] = iSpecificRisk*iv+(1-iv)*iStd
    return Rslt

# Volatility Regime Adjustment
# ret: DataFrame(收益率,index=[日期(频率为日)],columns=[ID或者因子]);
# forcast_volitility: DataFrame(波动率预测,index=[预测日期],columns=[ID或者因子]);
# half_life: 计算乘子的半衰期; forcast_num: 预测期数, 如果为<=0的数据则用forcast_volitility的日期间隔计算收益
# 返回调整乘子 
Example 20
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, name, fdb, sys_args={}, **kwargs):
        self._DBTableName = fdb.TablePrefix + fdb._TableInfo.loc[name, "DBTableName"]
        self._FactorInfo = fdb._FactorInfo.loc[name]
        self._IDField = self._FactorInfo["DBFieldName"][self._FactorInfo["FieldType"]=="ID"].iloc[0]# ID 字段
        self._IDFieldIsStr = (_identifyDataType(self._FactorInfo["DataType"][self._FactorInfo["FieldType"]=="ID"].iloc[0])!="double")
        self._ConditionFields = self._FactorInfo[self._FactorInfo["FieldType"]=="Condition"].index.tolist()# 所有的条件字段列表
        self._MainTableName = fdb._TableInfo.loc[name, "MainTableName"]
        if pd.isnull(self._MainTableName):
            self._MainTableName = self._DBTableName
            self._MainTableID = self._IDField
            self._MainTableCondition = None
        else:
            self._MainTableName = fdb.TablePrefix + self._MainTableName
            self._MainTableID = fdb._TableInfo.loc[name, "MainTableID"]
            self._JoinCondition = fdb._TableInfo.loc[name, "JoinCondition"].format(DBTable=self._DBTableName, MainTable=self._MainTableName)
            self._MainTableCondition = fdb._TableInfo.loc[name, "MainTableCondition"]
            if pd.notnull(self._MainTableCondition):
                self._MainTableCondition = self._MainTableCondition.format(MainTable=self._MainTableName)
            self._IDFieldIsStr = True
        self._SecurityType = fdb._TableInfo.loc[name, "SecurityType"]
        return super().__init__(name=name, fdb=fdb, sys_args=sys_args, **kwargs) 
Example 21
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_prepareRawData__(self, factor_names, ids, dts, args={}):
        # 形成SQL语句, ID, 因子数据
        SQLStr = "SELECT "+self._getIDField()+" AS ID, "
        FieldSQLStr, SETableJoinStr = self._genFieldSQLStr(factor_names)
        SQLStr += FieldSQLStr+" "
        SQLStr += self._genFromSQLStr(setable_join_str=SETableJoinStr)+" "
        SQLStr += "WHERE ("+genSQLInCondition(self._MainTableName+"."+self._MainTableID, deSuffixID(ids), is_str=self._IDFieldIsStr, max_num=1000)+") "
        if pd.notnull(self._MainTableCondition): SQLStr += "AND "+self._MainTableCondition+" "
        SQLStr += self._genConditionSQLStr(args=args)+" "
        SQLStr += "ORDER BY ID"
        RawData = self._FactorDB.fetchall(SQLStr)
        if not RawData: return pd.DataFrame(columns=["ID"]+factor_names)
        RawData = pd.DataFrame(np.array(RawData, dtype="O"), columns=["ID"]+factor_names)
        RawData = self._adjustRawDataByRelatedField(RawData, factor_names)
        RawData["ID"] = [str(iID) for iID in RawData["ID"]]
        return RawData 
Example 22
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def getID(self, ifactor_name=None, idt=None, args={}):
        SQLStr = "SELECT DISTINCT "+self._getIDField()+" AS ID "
        SQLStr += self._genFromSQLStr()+" "
        if idt is not None:
            SQLStr += "WHERE "+self._DBTableName+"."+self._StartDateField+"<='"+idt.strftime("%Y-%m-%d")+"' "
            if self._EndDateIncluded:
                SQLStr += "AND "+self._DBTableName+"."+self._EndDateField+">='"+idt.strftime("%Y-%m-%d")+"' "
            else:
                SQLStr += "AND "+self._DBTableName+"."+self._EndDateField+">'"+idt.strftime("%Y-%m-%d")+"' "        
        else: SQLStr += "WHERE "+self._DBTableName+"."+self._StartDateField+" IS NOT NULL "
        SQLStr += "AND "+self._DBTableName+"."+self._IDField+" IS NOT NULL "
        if pd.notnull(self._MainTableCondition): SQLStr += "AND "+self._MainTableCondition+" "
        SQLStr += self._genConditionSQLStr(args=args)+" "
        SQLStr += "ORDER BY ID"
        return [iRslt[0] for iRslt in self._FactorDB.fetchall(SQLStr)]
    # 返回给定 ID iid 的起始日期距今的时点序列
    # 如果 idt 为 None, 将以表中最小的起始日期作为起点
    # 忽略 ifactor_name 
Example 23
Project: gullikson-scripts   Author: kgullikson88   File: Sensitivity.py    MIT License 5 votes vote down vote up
def split_by_component(df):
    df['prim_comp'] = df.Comp.map(lambda s: s[0])
    df['sec_comp'] = df.Comp.map(lambda s: s[-1])
    comps = pd.concat((df[['prim_comp', 'Sp1']], df[['sec_comp', 'Sp2']]))
    prim = comps.loc[comps.prim_comp.notnull()].rename(columns={'Sp1': 'SpT', 'prim_comp': 'comp'})
    sec = comps.loc[comps.sec_comp.notnull()].rename(columns={'Sp2': 'SpT', 'sec_comp': 'comp'})
    return pd.concat((prim, sec))[['comp', 'SpT']].drop_duplicates(subset='comp') 
Example 24
Project: techa   Author: havocesp   File: overlap.py    The Unlicense 5 votes vote down vote up
def KAMA(data, period=20, er_period=10, fast=2, slow=30, price='close'):
    """
    Kaufman's Adaptive Moving Average

    KAMA is a moving average designed to account for market noise or volatility.

    It's main advantage is that it takes into consideration not just the direction, but the market volatility as
    well.

    :param pd.DataFrame data: pandas DataFrame with open, high, low, close data
    :param int period: period used for indicator calculation
    :param int er_period: period used for indicator calculation
    :param int fast: fast period used for indicator calculation
    :param int slow: slow period used for indicator calculation
    :param str price: column used for indicator calculation (default = "close")
    :return pd.Series: with indicator data calculation results
    """
    er = ER(data, er_period)
    fast_alpha = 2 / (fast + 1)
    slow_alpha = 2 / (slow + 1)

    # smoothing constant
    # noinspection PyTypeChecker
    sc = pd.Series((er * (fast_alpha - slow_alpha) + slow_alpha) ** 2)
    sma_ = SMA(data, period, price)

    kama_ = []

    for smooth, ma, price in zip(sc, sma_.shift(-1), data[price]):
        try:
            kama_.append(kama_[-1] + smooth * (price - kama_[-1]))
        except (IndexError, TypeError):
            if pd.notnull(ma):
                kama_.append(ma + smooth * (price - ma))
            else:
                kama_.append(None)

    return pd.Series(kama_, index=sma_.index, name='KAMA') 
Example 25
Project: recordlinkage   Author: J535D165   File: base.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _make_index_names(self, name1, name2):

        if pandas.notnull(name1) and pandas.notnull(name2) and \
                (name1 == name2):
            return ["{}{}".format(name1, self.suffixes[0]),
                    "{}{}".format(name1, self.suffixes[1])]
        else:
            return [name1, name2] 
Example 26
Project: SA-LSTM   Author: hobincar   File: MSVD.py    MIT License 5 votes vote down vote up
def load_captions(self):
        df = pd.read_csv(self.caption_fpath)
        df = df[df['Language'] == 'English']
        df = df[pd.notnull(df['Description'])]
        captions = df['Description'].values
        return captions 
Example 27
Project: SA-LSTM   Author: hobincar   File: MSVD.py    MIT License 5 votes vote down vote up
def load_captions(self):
        df = pd.read_csv(self.caption_fpath)
        df = df[df['Language'] == 'English']
        df = df[[ 'VideoID', 'Start', 'End', 'Description' ]]
        df = df[pd.notnull(df['Description'])]

        for video_id, start, end, caption in df.values:
            vid = "{}_{}_{}".format(video_id, start, end)
            self.captions[vid].append(caption) 
Example 28
Project: SA-LSTM   Author: hobincar   File: MSVD.py    MIT License 5 votes vote down vote up
def load_metadata():
    df = pd.read_csv(C.caption_fpath)
    df = df[df['Language'] == 'English']
    df = df[pd.notnull(df['Description'])]
    df = df.reset_index(drop=True)
    return df 
Example 29
Project: grafana-csv-datasource   Author: SmartBlug   File: PythonServer.py    MIT License 5 votes vote down vote up
def dataframe_to_json_table(target, df):
    response = []

    if df.empty:
        return response

    if isinstance(df, pd.DataFrame):
        response.append({'type': 'table',
                         'columns': df.columns.map(lambda col: {"text": col}).tolist(),
                         'rows': df.where(pd.notnull(df), None).values.tolist()})
    else:
        abort(404, Exception('Received object is not a dataframe.'))

    return response 
Example 30
Project: msdas   Author: cokelaer   File: replicates.py    GNU General Public License v3.0 5 votes vote down vote up
def set_irrelevant_replicates_to_na(self):
        """Set unique replicate to NAs

        If an experiment has no replicates (0 or 1), you may want to set the
        experiment to NA. This is not relevant if there are zero replicates
        since the value may already be an NA but may make sense when there
        is only one replicate, for which no errors can be obtained.


        .. plot::
            :include-source:
            :width: 70%

            from msdas import *
            r = ReplicatesYeast(get_yeast_raw_data(), verbose=False)
            r.set_irrelevant_replicates_to_na()
            r.hist_na_per_experiments(color="r", alpha=0.5)
            r.reset()
            r.hist_na_per_experiments(color="g", alpha=0.5)

        """
        tags = self.get_unique_measurement_name()
        for tag in tags:
            df = self.get_replicates_from_one_unique_measurement(tag)
            indices = pd.notnull(df).sum(axis=1)<=1
            indices = [k for k,v in indices.iteritems() if v]
            colnames = [c for c in self.df.columns if c.split(".")[0]==tag]
            self.df.ix[indices, colnames] = np.nan 
Example 31
Project: msdas   Author: cokelaer   File: replicates.py    GNU General Public License v3.0 5 votes vote down vote up
def _get_na_count_per_experiment(self):
        nas = {}
        for tag in self.get_unique_measurement_name():
           df = self.get_replicates_from_one_unique_measurement(tag)
           R = len(df.columns)
           na = R  - pd.notnull(df).sum(axis=1)
           nas[tag] = na.copy()
        return nas 
Example 32
Project: msdas   Author: cokelaer   File: readers.py    GNU General Public License v3.0 5 votes vote down vote up
def get_na_count(self):
        """Return vector with number of NA per row (psite/protein)"""
        return len(self.df.columns) - pd.notnull(self.df).sum(axis=1) 
Example 33
Project: QuantStudio   Author: Scorpi000   File: ReturnBasedModel.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        TargetNAV = self._TargetTable.readData(dts=[idt], ids=self._Output["目标ID"], factor_names=[self.TargetNAV]).iloc[0, :, :].values
        self._Output["目标净值"] = np.r_[self._Output["目标净值"], TargetNAV]
        StyleNAV = self._StyleTable.readData(dts=[idt], ids=self._Output["风格ID"], factor_names=[self.StyleNAV]).iloc[0, :, :].values
        self._Output["风格指数净值"] = np.r_[self._Output["风格指数净值"], StyleNAV]
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
        if self._Output["目标净值"].shape[0]-1<self.MinSummaryWindow: return 0
        StartInd = int(max(0, self._Output["目标净值"].shape[0] - 1 - self.SummaryWindow))
        X = _calcReturn(self._Output["风格指数净值"][StartInd:, :], return_type=self.ReturnType)
        Y = _calcReturn(self._Output["目标净值"][StartInd:, :], return_type=self.ReturnType)
        nTargetID, nStyleID = len(self._Output["目标ID"]), len(self._Output["风格ID"])
        Rsquared = np.full((nTargetID, ), np.nan)
        for i, iID in enumerate(self._Output["目标ID"]):
            iMask = ((np.sum(pd.isnull(X), axis=1)==0) & (pd.notnull(Y[:, i])))
            try:
                iBeta = regressByCVX(Y[:, i], X, weight=None, constraints={"Box": {"ub": np.ones((nStyleID, )), "lb": np.zeros((nStyleID, ))},
                                                                                                                      "LinearEq": {"Aeq": np.ones((1, nStyleID)), "beq": 1}})
            except:
                iBeta = None
            if iBeta is None:
                self._Output["滚动回归系数"][iID].append(np.full((nStyleID, ), np.nan))
            else:
                self._Output["滚动回归系数"][iID].append(iBeta)
                Rsquared[i] = 1 - np.nansum((Y[:, i][iMask] - np.dot(X[iMask], iBeta))**2) / np.nansum((Y[:, i][iMask] - np.nanmean(Y[:, i][iMask]))**2)
        self._Output["滚动回归R平方"].append(Rsquared)
        self._Output["时点"].append(idt)
        return 0 
Example 34
Project: QuantStudio   Author: Scorpi000   File: ReturnBasedModel.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        DTs, StyleIDs, TargetIDs = self._Output.pop("时点"), self._Output.pop("风格ID"), self._Output.pop("目标ID")
        nTargetID, nStyleID = len(TargetIDs), len(StyleIDs)
        X = _calcReturn(self._Output["风格指数净值"], return_type=self.ReturnType)
        Y = _calcReturn(self._Output["目标净值"], return_type=self.ReturnType)
        self._Output["全样本回归系数"] = np.full(shape=(nStyleID, nTargetID), fill_value=np.nan)
        self._Output["全样本回归R平方"] = np.full(shape=(nTargetID, ), fill_value=np.nan)
        for i, iID in enumerate(TargetIDs):
            iMask = ((np.sum(pd.isnull(X), axis=1)==0) & (pd.notnull(Y[:, i])))
            try:
                iBeta = regressByCVX(Y[:, i], X, weight=None, constraints={"Box": {"ub": np.ones((nStyleID, )), "lb": np.zeros((nStyleID, ))},
                                                                                                                      "LinearEq": {"Aeq": np.ones((1, nStyleID)), "beq": 1}})
            except:
                iBeta = None
            if iBeta is not None:
                self._Output["全样本回归系数"][:, i] = iBeta
                self._Output["全样本回归R平方"][i] = 1 - np.nansum((Y[:, i][iMask] - np.dot(X[iMask], iBeta))**2) / np.nansum((Y[:, i][iMask] - np.nanmean(Y[:, i][iMask]))**2)
            self._Output["滚动回归系数"][iID] = pd.DataFrame(self._Output["滚动回归系数"][iID], index=DTs, columns=self.StyleIDs)
        self._Output["全样本回归系数"] = pd.DataFrame(self._Output["全样本回归系数"], index=StyleIDs, columns=TargetIDs)
        self._Output["全样本回归R平方"] = pd.DataFrame(self._Output["全样本回归R平方"], index=TargetIDs, columns=["全样本回归R平方"])
        self._Output["滚动回归R平方"] = pd.DataFrame(self._Output["滚动回归R平方"], index=DTs, columns=TargetIDs)
        self._Output["目标净值"] = pd.DataFrame(self._Output["目标净值"], index=self._Model.DateTimeSeries, columns=self.TargetIDs)
        self._Output["风格指数净值"] = pd.DataFrame(self._Output["风格指数净值"], index=self._Model.DateTimeSeries, columns=self.StyleIDs)
        return 0 
Example 35
Project: QuantStudio   Author: Scorpi000   File: BrinsonModel.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        PreDT = None
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
            if self._CurCalcInd>0: PreDT = self.CalcDTs[self._CurCalcInd - 1]
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
            if self._CurCalcInd>0: PreDT = self._Model.DateTimeSeries[self._CurCalcInd - 1]
        if PreDT is None: return 0
        Portfolio = self._FactorTable.readData(factor_names=[self.Portfolio, self.BenchmarkPortfolio], dts=[PreDT], ids=self._IDs).iloc[:, 0, :]
        BenchmarkPortfolio, Portfolio = Portfolio.iloc[:, 1], Portfolio.iloc[:, 0]
        Portfolio[pd.isnull(Portfolio)], BenchmarkPortfolio[pd.isnull(BenchmarkPortfolio)] = 0.0, 0.0
        Price = self._FactorTable.readData(factor_names=[self.PriceFactor], dts=[PreDT, idt], ids=self._IDs).iloc[0]
        Return = Price.iloc[1] / Price.iloc[0] - 1
        Return[pd.isnull(Return)] = 0.0
        GroupData = self._FactorTable.readData(factor_names=[self.GroupFactor], ids=self._IDs, dts=[PreDT]).iloc[0, 0, :]
        AllGroups = pd.unique(GroupData[pd.notnull(GroupData)].values).tolist()
        if GroupData.hasnans: AllGroups.append(None)
        for iGroup in AllGroups:
            if iGroup is None: iMask = pd.isnull(GroupData)
            else: iMask = (GroupData==iGroup)
            iGroup = str(iGroup)
            iPortfolio, iBenchmarkPortfolio = Portfolio[iMask], BenchmarkPortfolio[iMask]
            iGroupWeight, iBenchmarkGroupWeight = iPortfolio.sum(), iBenchmarkPortfolio.sum()
            self._Output["策略组合资产权重"].loc[idt, iGroup] = iGroupWeight
            self._Output["基准组合资产权重"].loc[idt, iGroup] = iBenchmarkGroupWeight
            self._Output["策略组合资产收益"].loc[idt, iGroup] = ((iPortfolio * Return[iMask]).sum() / iGroupWeight if iGroupWeight!=0 else 0.0)
            self._Output["基准组合资产收益"].loc[idt, iGroup] = ((iBenchmarkPortfolio * Return[iMask]).sum() / iBenchmarkGroupWeight if iBenchmarkGroupWeight!=0 else 0.0)
        self._Output["策略组合资产权重"].loc[idt, "现金"] = 1 - self._Output["策略组合资产权重"].loc[idt].iloc[1:].sum()
        self._Output["基准组合资产权重"].loc[idt, "现金"] = 1 - self._Output["基准组合资产权重"].loc[idt].iloc[1:].sum()
        return 0 
Example 36
Project: QuantStudio   Author: Scorpi000   File: PortfolioStrategy.py    GNU General Public License v3.0 5 votes vote down vote up
def _genSignalIDs(self, idt, original_ids, signal_type):
        IDs = original_ids
        FilterLevel = 0
        for i in range(self.FiltrationLevel):
            iArgs = self["第"+str(i)+"层"]
            if iArgs.SignalType!=signal_type: continue
            if iArgs.IDFilter:
                iIDs = self._FT.getFilteredID(idt, id_filter_str=iArgs.IDFilter)
                IDs = sorted(set(iIDs).intersection(set(IDs)))
            if iArgs.GroupFactors:
                GroupData = self._FT.readData(dts=[idt], ids=IDs, factor_names=list(iArgs.GroupFactors)).iloc[:,0,:]
                if GroupData.shape[0]>0: GroupData[pd.isnull(GroupData)] = np.nan
                AllGroups = [GroupData[iGroup].unique().tolist() for iGroup in iArgs.GroupFactors]
                AllGroups = CartesianProduct(AllGroups)
                IDs = []
                for jGroup in AllGroups:
                    jMask = pd.Series(True, index=GroupData.index)
                    for k, kSubGroup in enumerate(jGroup):
                        if pd.notnull(kSubGroup): jMask = (jMask & (GroupData[iArgs.GroupFactors[k]]==kSubGroup))
                        else: jMask = (jMask & pd.isnull(GroupData[iArgs.GroupFactors[k]]))
                    jIDs = self._filtrateID(idt, GroupData[jMask].index.tolist(), iArgs)
                    IDs += jIDs
            else:
                IDs = self._filtrateID(idt, IDs, iArgs)
            FilterLevel += 1
        if FilterLevel>0: return IDs
        else: return [] 
Example 37
Project: QuantStudio   Author: Scorpi000   File: DefaultAccount.py    GNU General Public License v3.0 5 votes vote down vote up
def order(self, target_id=None, num=0, target_price=np.nan, combined_order=None):
        if target_id is not None:
            self._Orders.loc[self._Orders.shape[0]] = (target_id, num, target_price)
            if pd.notnull(target_price): self._QS_Logger.warning("账户: '%s' 不支持限价单, 限价单将自动转为市价单!" % self.Name)
            return (self._Orders.shape[0], target_id, num, target_price)
        if combined_order is not None:
            if pd.notnull(combined_order["目标价"]).sum()>0: self._QS_Logger.warning("本账户: '%s' 不支持限价单, 限价单将自动转为市价单!" % self.Name)
            combined_order.index.name = "ID"
            combined_order = combined_order.reset_index()
            combined_order.index = np.arange(self._Orders.shape[0], self._Orders.shape[0]+combined_order.shape[0])
            self._Orders = self._Orders.append(combined_order)
        return combined_order
    # 撤销订单, order_ids 是订单在 self.Orders 中的 index 
Example 38
Project: QuantStudio   Author: Scorpi000   File: Spread.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        Price = self._FactorTable.readData(dts=[idt], ids=self._IDs, factor_names=[self.PriceFactor]).iloc[0, :, :].values
        if self.PriceType=="对数价格":
            Price = np.log(Price)
            Price[np.isinf(Price)] = np.nan
        self._Output["价格"] = np.r_[self._Output["价格"], Price]
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
        StartInd = int(max(0, self._Output["价格"].shape[0] - self.SummaryWindow))
        if self._Output["价格"].shape[0] - StartInd < self.MinSummaryWindow: return 0
        Price, nID = self._Output["价格"][StartInd:], self._Output["价格"].shape[1]
        IDMask = self._FactorTable.getIDMask(idt=idt, ids=self._IDs, id_filter_str=self.IDFilter).values
        Price = Price[:, IDMask]
        Mask = pd.notnull(Price)
        Statistics, pValue = np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan), np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan)
        for i in range(Price.shape[1]):
            for j in range(i+1, Price.shape[1]):
                ijMask = (Mask[:, i] & Mask[:, j])
                try:
                    iRslt = sm.tsa.stattools.coint(Price[:,i][ijMask], Price[:,j][ijMask], **self.CointArgs)
                    Statistics[i, j] = Statistics[j, i] = iRslt[0]
                    pValue[i, j] = pValue[j, i] = iRslt[1]
                except:
                    pass
        self._Output["统计量"][idt], self._Output["p值"][idt] = pd.DataFrame(index=self._IDs, columns=self._IDs), pd.DataFrame(index=self._IDs, columns=self._IDs)
        self._Output["统计量"][idt].iloc[IDMask, IDMask] = Statistics
        self._Output["p值"][idt].iloc[IDMask, IDMask] = pValue
        return 0 
Example 39
Project: QuantStudio   Author: Scorpi000   File: Spread.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        DTs = sorted(self._Output["统计量"])
        self._Output["最后一期检验"] = {"统计量": self._Output["统计量"][DTs[-1]], "p值": self._Output["p值"][DTs[-1]]}
        Price = self._Output.pop("价格")
        if np.isinf(self.SummaryWindow) and (DTs[-1]==self._iDT) and (not self.IDFilter):
            self._Output["全样本检验"] = deepcopy(self._Output["最后一期检验"])
        else:
            Mask = pd.notnull(Price)
            Statistics, pValue = np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan), np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan)
            for i in range(Price.shape[1]):
                for j in range(i+1, Price.shape[1]):
                    ijMask = (Mask[:, i] & Mask[:, j])
                    try:
                        iRslt = sm.tsa.stattools.coint(Price[:,i][ijMask], Price[:,j][ijMask], **self.CointArgs)
                        Statistics[i, j] = Statistics[j, i] = iRslt[0]
                        pValue[i, j] = pValue[j, i] = iRslt[1]
                    except:
                        pass
            self._Output["全样本检验"] = {"统计量": pd.DataFrame(Statistics, index=self._IDs, columns=self._IDs), "p值": pd.DataFrame(pValue, index=self._IDs, columns=self._IDs)}
        self._Output["滚动检验"] = {"统计量": pd.Panel(self._Output.pop("统计量")).loc[DTs].swapaxes(0, 1).to_frame(filter_observations=False).reset_index(),
                                    "p值": pd.Panel(self._Output.pop("p值")).loc[DTs].swapaxes(0, 1).to_frame(filter_observations=False).reset_index()}
        Cols = self._Output["滚动检验"]["统计量"].columns.tolist()
        Cols[0], Cols[1] = "时点", "ID"
        self._Output["滚动检验"]["统计量"].columns = self._Output["滚动检验"]["p值"].columns = Cols
        return 0 
Example 40
Project: QuantStudio   Author: Scorpi000   File: AbnormalReturn.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        CurInd = self._AllDTs.index(idt)
        if CurInd<=self.EventPreWindow+self.EstWindow: return 0
        self._Output["事件记录"][:, 2] += 1
        IDs = self._FactorTable.getFilteredID(idt=idt, id_filter_str=self.EventFilter)
        nID, EventWindow = len(IDs), self.EventPreWindow+1+self.EventPostWindow
        if nID>0:
            self._Output["事件记录"] = np.r_[self._Output["事件记录"], np.c_[IDs, [idt]*nID, np.zeros(shape=(nID, 1))]]
            self._Output["正常收益率"] = np.r_[self._Output["正常收益率"], np.full(shape=(nID, EventWindow), fill_value=np.nan)]
            self._Output["异常收益率"] = np.r_[self._Output["异常收益率"], np.full(shape=(nID, EventWindow), fill_value=np.nan)]
            self._Output["异常协方差"] = np.r_[self._Output["异常协方差"], np.full(shape=(nID, EventWindow, EventWindow), fill_value=np.nan)]
            EstStartInd = CurInd - self.EventPreWindow - self.EstWindow - 1
            Price = self._FactorTable.readData(dts=self._AllDTs[EstStartInd:CurInd+1], ids=IDs, factor_names=[self.PriceFactor]).iloc[0, :, :]
            Return = _calcReturn(Price.values, return_type=self.ReturnType)
            EstReturn = Return[:self.EstWindow]
            if self.EstSampleFilter:
                temp = self._FactorTable.readData(dts=self._AllDTs[EstStartInd+1:EstStartInd+self.EstWindow+1], ids=IDs, factor_names=self._FilterFactors)
                FilterMask = eval(self._CompiledIDFilterStr).values
            else:
                FilterMask = np.full(EstReturn.shape, fill_value=True)
            FilterMask = (FilterMask & pd.notnull(EstReturn))
            FilterMask = (FilterMask & (np.flipud(np.cumsum(np.flipud(FilterMask), axis=0))<=self.EstSampleLen))
            EstReturn[~FilterMask] = np.nan
            ExpectedReturn, Var = np.nanmean(EstReturn, axis=0), np.nanvar(EstReturn, axis=0, ddof=1)
            FilterMask = ((np.sum(FilterMask, axis=0)<self.EstSampleLen) | (Var<1e-6))
            ExpectedReturn[FilterMask] = np.nan
            Var[FilterMask] = np.nan
            self._Output["正常收益率"][-nID:, :] = ExpectedReturn.reshape((nID, 1)).repeat(EventWindow, axis=1)
            self._Output["异常收益率"][-nID:, :self.EventPreWindow+1] = (Return[self.EstWindow:] - ExpectedReturn).T
            CovMatrix = (np.eye(EventWindow)+np.ones((EventWindow, EventWindow))/self.EstSampleLen).reshape((1, EventWindow, EventWindow)).repeat(nID, axis=0)
            self._Output["异常协方差"][-nID:, :, :] = (CovMatrix.T*Var).T
        Mask = (self._Output["事件记录"][:, 2]<=self.EventPostWindow)
        if np.sum(Mask)==0: return 0
        IDs = self._Output["事件记录"][:, 0][Mask]
        RowPos, ColPos = np.arange(self._Output["异常收益率"].shape[0])[Mask].tolist(), (self._Output["事件记录"][Mask, 2]+self.EventPreWindow).astype(np.int)
        Price = self._FactorTable.readData(dts=[self._AllDTs[CurInd-1], idt], ids=sorted(set(IDs)), factor_names=[self.PriceFactor]).iloc[0, :, :].loc[:, IDs]
        self._Output["异常收益率"][RowPos, ColPos] = (_calcReturn(Price.values, return_type=self.ReturnType)[0] - self._Output["正常收益率"][RowPos, ColPos])
        return 0 
Example 41
Project: QuantStudio   Author: Scorpi000   File: IC.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        self._Output["IC"] = pd.DataFrame(np.array(self._Output["IC"]).T, index=self._Output.pop("时点"), columns=list(self.LookBack))
        if self.FactorOrder=="升序": self._Output["IC"] = -self._Output["IC"]
        self._Output["统计数据"] = pd.DataFrame(index=self._Output["IC"].columns)
        self._Output["统计数据"]["IC平均值"] = self._Output["IC"].mean()
        nDT = pd.notnull(self._Output["IC"]).sum()
        self._Output["统计数据"]["标准差"] = self._Output["IC"].std()
        self._Output["统计数据"]["IC_IR"] = self._Output["统计数据"]["IC平均值"] / self._Output["统计数据"]["标准差"]
        self._Output["统计数据"]["t统计量"] = self._Output["统计数据"]["IC_IR"] * nDT**0.5
        self._Output["统计数据"]["胜率"] = (self._Output["IC"]>0).sum() / nDT
        return 0 
Example 42
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 5 votes vote down vote up
def standardizeRank(data, mask=None, cat_data=None, ascending=True, uniformization=True, perturbation=False, offset=0.5, other_handle='填充None'):
    """Rank 标准化"""
    if other_handle=="保持不变":
        StdData = np.copy(data)
    else:
        StdData = np.empty(data.shape,dtype='float')+np.nan
    if mask is None:
        mask = pd.isnull(StdData)
    if perturbation:
        UniqueData = data[pd.notnull(data)]
        if UniqueData.shape[0]>0:
            UniqueData = np.sort(pd.unique(UniqueData))
            MinDiff = np.min(np.abs(np.diff(UniqueData)))
            data = data+np.random.rand(data.shape[0])*MinDiff*0.01
    CatMasks = maskCategary(data.shape[0],cat_data=cat_data,mask=mask)
    for jCat,jCatMask in CatMasks.items():
        jData = data[jCatMask]
        jNotNaMask = pd.notnull(jData)
        if ascending:
            jRank = np.argsort(np.argsort(jData[jNotNaMask]))
        else:
            jRank = np.argsort(np.argsort(-jData[jNotNaMask]))
        if uniformization:
            jRank = (jRank.astype('float')+offset)/jRank.shape[0]
        else:
            jRank = jRank.astype('float')
        jData[jNotNaMask] = jRank
        StdData[jCatMask] = jData
    return StdData

# 分位数变换(Quantile Transformation)标准化
# data: 待标准化的数据, array; cat_data: 分类数据, array
# ascending: 是否升序, 可选: True, False 
Example 43
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 5 votes vote down vote up
def standardizeDynamicPeer(data, corr_matrix, mask=None, cat_data=None, n_group=10, other_handle='填充None'):
    """动态分组标准化"""
    if mask is None:
        mask = (np.zeros(data.shape)==0)
    if other_handle=="保持不变":
        StdData = np.copy(data)
    else:
        StdData = np.empty(data.shape,dtype='float')+np.nan
    for j in range(data.shape[0]):
        if not mask[j]:
            continue
        jPeerCorr = corr_matrix[j,:]
        jNum = min((n_group,np.sum(jPeerCorr>0.0)))
        jData = None
        if jNum>=2:
            jPeerInds = np.argsort(-jPeerCorr)[:jNum]
            jData = data[jPeerInds]
            if np.sum(pd.notnull(jData))<2:
                jData = None
        if jData is None:
            if cat_data is not None:
                jCat = cat_data[j]
                if pd.notnull(jCat):
                    jCatMask = (cat_data==jCat)
                else:
                    jCatMask = pd.isnull(cat_data)
                jData = data[jCatMask]
            else:
                jData = data
        jStd = np.nanstd(jData)
        jAvg = np.nanmean(jData)
        if jStd==0:
            StdData[j] = 0.0
        else:
            StdData[j] = (data[j]-jAvg)/jStd
    return StdData

# 以之前的值进行缺失值填充
# data: 待填充的数据, array; dts: 时间序列, array; lookback: 如果指定了时间序列 dts 则为回溯的时间, 以秒为单位, 否则为回溯期数 
Example 44
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 5 votes vote down vote up
def fillNaNByRegress(Y, X, mask=None, cat_data=None, constant=False, dummy_data=None, drop_dummy_na=False):
    StdData = np.copy(Y)
    if mask is None:
        mask = (np.zeros(Y.shape)==0)
    CatMasks = maskCategary(Y.shape[0], cat_data=cat_data, mask=mask)
    for iCat,iCatMask in CatMasks.items():
        iY = Y[iCatMask]
        iNAMask = pd.isnull(iY)
        iNANum = np.sum(iNAMask)
        if iNANum==0:
            continue
        iX = (X[iCatMask] if X is not None else X)
        iDummy = (dummy_data[iCatMask] if dummy_data is not None else dummy_data)
        iXNotNAMask,_,_,iXX = prepareRegressData(np.ones(iY.shape[0]), iX, has_constant=constant, dummy_data=iDummy,drop_dummy_na=drop_dummy_na)
        iYY = iY[iXNotNAMask]
        iRegressMask = pd.notnull(iYY)
        if np.sum(iRegressMask)<2:
            continue
        iRslt = sm.OLS(iYY[iRegressMask],iXX[iRegressMask],missing='drop').fit()
        iBeta = iRslt.params
        iX = np.zeros((iY.shape[0],iBeta.shape[0]))+np.nan
        iX[iXNotNAMask] = iXX
        iY_hat = np.sum(iX*iBeta,axis=1)
        iY[iNAMask] = iY_hat[iNAMask]
        StdData[iCatMask] = iY
    return StdData
# 异常值处理; 超过给定标准差倍数的值用相应标准差倍数填充
# data: 待处理的数据, array; std_multiplier: 标准差倍数, double
# method: 处理方式, 可选: 截断, 丢弃, 变换; std_tmultiplier: method为变换时所用到的标准差倍数, double 
Example 45
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def calcPortfolioReturn(portfolio, return_rate):
    PortfolioReturn = 0
    for iID in portfolio:
        iRetRate = return_rate.get(iID)
        if pd.notnull(iRetRate):
            PortfolioReturn += portfolio[iID]*return_rate[iID]
        else:
            PortfolioReturn += portfolio[iID]*(-1)
    return PortfolioReturn
# 计算收益率序列, wealth_seq: 净值序列, array; init_wealth: 初始财富, 若为None使用wealth_seq的第一个元素 
Example 46
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def calcBeta(wealth_seq, market_wealth_seq):
    YieldSeq = calcYieldSeq(wealth_seq)
    MarketYieldSeq = calcYieldSeq(market_wealth_seq)
    Mask = (pd.notnull(MarketYieldSeq) & pd.notnull(YieldSeq))
    return np.cov(YieldSeq[Mask],MarketYieldSeq[Mask])[0,1]/np.nanvar(MarketYieldSeq)
# 计算收益率的 Lower Partial Moment, wealth_seq: 净值序列, array 
Example 47
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def calcHPM(wealth_seq, threshold=0.0, order=2):
    YieldSeq = calcYieldSeq(wealth_seq)
    # This method returns a lower partial moment of the returns
    # Create an array has same length as wealth_seq containing the minimum return threshold
    ThresholdArray = np.empty(YieldSeq.shape[0])
    ThresholdArray.fill(threshold)
    # Calculate the difference between the threshold and the returns
    Diff = YieldSeq - ThresholdArray
    # Set the minimum of each to 0
    Diff = Diff.clip(min=0)
    # Return the sum of the different to the power of order
    return np.nansum(Diff ** order) / np.sum(pd.notnull(YieldSeq))
# 计算夏普比率, wealth_seq: 净值序列, array 
Example 48
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def summaryStrategy(wealth_seq, dts, dt_ruler=None, init_wealth=None):
    nCol = (wealth_seq.shape[1] if wealth_seq.ndim>1 else 1)
    if nCol==1: wealth_seq = wealth_seq.reshape((wealth_seq.shape[0], 1))
    wealth_seq, dts = _densifyWealthSeq(wealth_seq, dts, dt_ruler)
    YieldSeq = calcYieldSeq(wealth_seq, init_wealth)
    if init_wealth is None: init_wealth = wealth_seq[0]
    StartDT, EndDT = dts[0], dts[-1]
    SummaryIndex = ['起始时点', '结束时点']
    SummaryData = [np.array([StartDT]*nCol), np.array([EndDT]*nCol)]
    SummaryIndex.append('时点数')
    SummaryData.append(np.zeros(nCol) + len(dts))
    SummaryIndex.append('总收益率')
    SummaryData.append(wealth_seq[-1] / init_wealth - 1)
    SummaryIndex.append('年化收益率')
    SummaryData.append(calcAnnualYield(wealth_seq, start_dt=StartDT, end_dt=EndDT))
    SummaryIndex.append('年化波动率')
    SummaryData.append(calcAnnualVolatility(wealth_seq, start_dt=StartDT, end_dt=EndDT))
    SummaryIndex.append('Sharpe比率')
    SummaryData.append(SummaryData[4] / SummaryData[5])
    SummaryIndex.append('胜率')
    SummaryData.append(np.sum(YieldSeq>=0, axis=0) / np.sum(pd.notnull(YieldSeq), axis=0))
    SummaryIndex.extend(("最大回撤率", "最大回撤开始时点", "最大回撤结束时点"))
    MaxDrawdownRate, MaxDrawdownStartDT, MaxDrawdownEndDT = [], [], []
    for i in range(nCol):
        iMaxDrawdownRate, iMaxDrawdownStartPos, iMaxDrawdownEndPos = calcMaxDrawdownRate(wealth_seq=wealth_seq[:, i])
        MaxDrawdownRate.append(np.abs(iMaxDrawdownRate))
        MaxDrawdownStartDT.append((dts[iMaxDrawdownStartPos] if iMaxDrawdownStartPos is not None else None))
        MaxDrawdownEndDT.append((dts[iMaxDrawdownEndPos] if iMaxDrawdownEndPos is not None else None))
    SummaryData.extend((np.array(MaxDrawdownRate), np.array(MaxDrawdownStartDT), np.array(MaxDrawdownEndDT)))
    return pd.DataFrame(SummaryData, index=SummaryIndex)
# 计算每年的收益率, wealth_seq: 净值序列, dts: 时间序列, dt_ruler: 时间标尺 
Example 49
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def loadCSVFilePortfolioSignal(csv_path):
    FileSignals = {}
    if not os.path.isfile(csv_path): raise __QS_Error__("文件: '%s' 不存在" % csv_path)
    with open(csv_path) as CSVFile:
        FirstLine = CSVFile.readline()
    if len(FirstLine.split(","))!=3:# 横向排列
        CSVDF = readCSV2Pandas(csv_path,detect_file_encoding=True)
        temp = list(CSVDF.columns)
        nCol = len(temp)
        AllSignalDates = [str(int(temp[i])) for i in range(0,nCol,2)]
        for i in range(int(nCol/2)):
            iDT = CSVDF.columns[i*2]
            iSignal = CSVDF.iloc[:,i*2:i*2+2]
            iSignal = iSignal[pd.notnull(iSignal.iloc[:,1])].set_index([iDT]).iloc[:,0]
            FileSignals[AllSignalDates[i]] = iSignal
    else:# 纵向排列
        CSVDF = readCSV2Pandas(csv_path,detect_file_encoding=True,header=0)
        AllSignalDates = pd.unique(CSVDF.iloc[:,0])
        AllColumns = list(CSVDF.columns)
        for iDT in AllSignalDates:
            iSignal = CSVDF.iloc[:, 1:][CSVDF.iloc[:,0]==iDT]
            iSignal = iSignal.set_index(AllColumns[1:2])
            iSignal = iSignal[AllColumns[2]]
            FileSignals[str(iDT)] = iSignal
    return FileSignals
# 将投资组合信号写入CSV文件 
Example 50
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def genRandomPortfolio(ids, target_num=20, weight=None):
    IDs = np.random.choice(np.array(ids),target_num,replace=False)
    IDs.sort()
    if weight is None:
        return pd.Series(1/IDs.shape[0],index=IDs)
    Portfolio = weight[IDs]
    Portfolio = Portfolio[pd.notnull(Portfolio) & (Portfolio!=0)]
    return Portfolio/Portfolio.sum()
# 以筛选的方式形成投资组合 
Example 51
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 5 votes vote down vote up
def plotHist(self):# 直方图
        SelectedColumn = self.getSelectedColumns()
        if len(SelectedColumn)!=1: return QtWidgets.QMessageBox.critical(self, "错误", "请选择一列!")
        SelectedDF,Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        SelectedDF = SelectedDF.iloc[:,0]
        GroupNum, isOK = QtWidgets.QInputDialog.getInt(self, '获取分组数', '分组数', value=10, min=1, max=1000, step=1)
        if not isOK: return 0
        yData = SelectedDF[pd.notnull(SelectedDF)].values
        xData = np.linspace(np.nanmin(yData),np.nanmax(yData),yData.shape[0]*10)
        yNormalData = stats.norm.pdf(xData,loc=np.nanmean(yData),scale=np.nanstd(yData))
        GraphObj = [plotly.graph_objs.Histogram(x=yData,histnorm='probability',name='直方图',nbinsx=GroupNum),plotly.graph_objs.Scatter(x=xData,y=yNormalData,name='Normal Distribution',line={'color':'rgb(255,0,0)','width':2})]
        with tempfile.TemporaryFile() as File:
            plotly.offline.plot({"data":GraphObj,"layout": plotly.graph_objs.Layout(title="直方图")}, filename=File.name)
        return 0 
Example 52
Project: QuantStudio   Author: Scorpi000   File: DataTypeConversionFun.py    GNU General Public License v3.0 5 votes vote down vote up
def Series2DataFrame(s,default_na=None):
    s_df = s.reset_index()
    Index = s_df.iloc[:,0].unique()
    Columns = s_df.iloc[:,1].unique()
    NewData = pd.DataFrame(index=Index,columns=Columns)
    NewData = NewData.where(pd.notnull(NewData),default_na)
    for iIndex in Index:
        iS = s.loc[iIndex]
        NewData.loc[iIndex,iS.index] = iS
    return NewData 
Example 53
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 5 votes vote down vote up
def regressWithOneLinearEqConstraint(y, x, weight=None, Aeq=None, beq=None, statistics=False):
    Mask = ((np.sum(np.isnan(x), axis=1)==0) & (pd.notnull(y)))
    if weight is not None:
        Mask = (Mask & pd.notnull(weight))
    else:
        weight = np.ones(y.shape)
    x = x[Mask,:]
    if x.shape[0]<=1: return None
    y = y[Mask]
    weight = weight[Mask]
    if (Aeq is not None) and (beq is not None):
        NonZeroInd = np.arange(0,Aeq.shape[0])[Aeq!=0]
        if NonZeroInd.shape[0]==0: return None
        NonZeroInd = NonZeroInd[0]
        yy = y-x[:,NonZeroInd]*beq/Aeq[NonZeroInd]
        if NonZeroInd==0:
            xx = -np.dot(x[:,NonZeroInd:NonZeroInd+1],Aeq[NonZeroInd+1:].reshape((1,Aeq.shape[0]-1-NonZeroInd))/Aeq[NonZeroInd])+x[:,1+NonZeroInd:]
        elif NonZeroInd==x.shape[1]-1:
            xx = x[:,:NonZeroInd]-np.dot(x[:,NonZeroInd:NonZeroInd+1],Aeq[:NonZeroInd].reshape((1,NonZeroInd))/Aeq[NonZeroInd])
        else:
            xx = np.hstack((x[:,:NonZeroInd]-np.dot(x[:,NonZeroInd:NonZeroInd+1],Aeq[:NonZeroInd].reshape((1,NonZeroInd))/Aeq[NonZeroInd]),-np.dot(x[:,NonZeroInd:NonZeroInd+1],Aeq[NonZeroInd+1:].reshape((1,Aeq.shape[0]-1-NonZeroInd))/Aeq[NonZeroInd])+x[:,1+NonZeroInd:]))
        Result = sm.WLS(yy,xx,weights=weight).fit()
        beta = np.zeros(x.shape[1])
        beta[NonZeroInd] = (beq-np.sum(Result.params*np.append(Aeq[:NonZeroInd],Aeq[NonZeroInd+1:])))/Aeq[NonZeroInd]
        beta[:NonZeroInd] = Result.params[:NonZeroInd]
        beta[NonZeroInd+1:] = Result.params[NonZeroInd:]
    else:
        Result = sm.WLS(y,x,weights=weight).fit()
        beta = Result.params
    if not statistics: return beta
    Statistics = {"R2":1-np.sum(weight*(y-np.dot(x,beta))**2)/np.sum(weight*y**2)}
    Statistics["R2_adj"] = 1-(1-Statistics["R2"])*(y.shape[0]-1)/(y.shape[0]-beta.shape[0]-1+((Aeq is not None) and (beq is not None)))
    return (beta, Statistics)

# 计算回归权重 
Example 54
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 5 votes vote down vote up
def calcMarketReturn(ret, weight=None):
    Mask = pd.notnull(ret)
    if weight is not None:
        Mask = (Mask & pd.notnull(weight))
    else:
        weight = pd.Series(1.0,index=ret.index)
    ret = ret[Mask]
    weight = weight[Mask]
    return (ret*weight).sum()/weight.sum()

# 计算残差收益的极端部分, resid_ret: 残差收益率, array 
Example 55
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 5 votes vote down vote up
def VolatilityRegimeAdjustment(ret, forcast_volitility, half_life=90, forcast_num=21):
    BiasStats = pd.Series(np.nan,index=forcast_volitility.index)
    for i,iDate in enumerate(forcast_volitility.index):
        iInd = (ret.index<=iDate).sum()-1
        if forcast_num>0:
            if ret.shape[0]>=iInd+forcast_num+1:
                iRet = (1+ret.iloc[iInd+1:iInd+forcast_num+1]).prod()-1
            else:
                continue
        else:
            if i==forcast_volitility.shape[0]-1:
                continue
            else:
                iNextDate = forcast_volitility.index[i+1]
                iNextInd = (ret.index<=iNextDate).sum()-1
                iRet = (1+ret.iloc[iInd+1:iNextInd+1]).prod()-1
        iTemp = (iRet/forcast_volitility.loc[iDate])**2
        BiasStats.loc[iDate] = iTemp.sum()/pd.notnull(iTemp).sum()
    BiasStats = BiasStats[pd.notnull(BiasStats)]
    Weight = getExpWeight(ret.shape[0],half_life=half_life,is_unitized=False)
    Weight.reverse()
    Weight = pd.Series(Weight,index=ret.index)
    Weight = Weight[BiasStats.index]/Weight[BiasStats.index].sum()
    return (Weight*BiasStats**2).sum()**0.5

# 去掉风险矩阵的缺失值 
Example 56
Project: QuantStudio   Author: Scorpi000   File: TushareDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_initArgs__(self):
        super().__QS_initArgs__()
        FactorInfo = self._FactorDB._FactorInfo.loc[self.Name]
        ConditionField = FactorInfo[pd.notnull(FactorInfo["Supplementary"])]
        for i, iCondition in enumerate(ConditionField.index):
            self.add_trait("Condition"+str(i), Enum(*ConditionField["Supplementary"].iloc[i].split(","), arg_type="String", label=iCondition, order=i)) 
Example 57
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _getIDField(self):
        if (self._MainTableName is None) or (self._MainTableName==self._DBTableName):
            if _identifyDataType(self._FactorInfo["DataType"][self._FactorInfo["FieldType"]=="ID"].iloc[0])=="double":
                RawIDField = "CAST("+self._DBTableName+"."+self._IDField+" AS CHAR)"
            else:
                RawIDField = self._DBTableName+"."+self._IDField
        else:
            RawIDField = self._MainTableName+"."+self._MainTableID
        DefaultSuffix = self._FactorDB._TableInfo.loc[self.Name, "DefaultSuffix"]
        Exchange = self._FactorDB._TableInfo.loc[self.Name, "Exchange"]
        if pd.isnull(Exchange):
            if pd.notnull(DefaultSuffix): RawIDField = "CONCAT("+RawIDField+", '"+DefaultSuffix+"')"
            return RawIDField
        ExchangeField, ExchangeCodes = Exchange.split(":")
        if self._MainTableName is None:
            ExchangeField = self._DBTableName + "." + ExchangeField
        else:
            ExchangeField = self._MainTableName + "." + ExchangeField
        ExchangeCodes = ExchangeCodes.split(",")
        ExchangeInfo = self._FactorDB._ExchangeInfo
        IDField = "CASE "+ExchangeField+" "
        for iCode in ExchangeCodes:
            IDField += "WHEN "+iCode+" THEN CONCAT("+RawIDField+", '"+ExchangeInfo.loc[iCode, "Suffix"]+"') "
        if pd.isnull(DefaultSuffix):
            IDField += "ELSE "+RawIDField+" END"
        else:
            IDField += "ELSE CONCAT("+RawIDField+", '"+DefaultSuffix+"') END"
        return IDField 
Example 58
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _getSecuMainIDField(self):
        ExchangeInfo = self._FactorDB._ExchangeInfo
        IDField = "CASE SecuMarket "
        for iCode in ExchangeInfo[pd.notnull(ExchangeInfo["Suffix"])].index:
            IDField += "WHEN "+iCode+" THEN CONCAT(SecuCode, '"+ExchangeInfo.loc[iCode, "Suffix"]+"') "
        IDField += "ELSE SecuCode END"
        return IDField 
Example 59
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def FactorNames(self):
        return self._FactorInfo[pd.notnull(self._FactorInfo["FieldType"])].index.tolist() 
Example 60
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def getID(self, ifactor_name=None, idt=None, args={}):
        SQLStr = "SELECT DISTINCT "+self._getIDField()+" AS ID "
        SQLStr += self._genFromSQLStr()+" "
        SQLStr += "WHERE "+self._DBTableName+"."+self._IDField+" IS NOT NULL "
        if pd.notnull(self._MainTableCondition): SQLStr += "AND "+self._MainTableCondition+" "
        SQLStr += self._genConditionSQLStr(args=args)+" "
        SQLStr += "ORDER BY ID"
        return [str(iRslt[0]) for iRslt in self._FactorDB.fetchall(SQLStr)] 
Example 61
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def getDateTime(self, ifactor_name=None, iid=None, start_dt=None, end_dt=None, args={}):
        SQLStr = "SELECT MIN("+self._DBTableName+"."+self._StartDateField+") "# 起始日期
        if iid is not None:
            SQLStr += self._genFromSQLStr()+" "
            SQLStr += "WHERE "+self._MainTableName+"."+self._MainTableID+"='"+deSuffixID([iid])[0]+"' "
            if pd.notnull(self._MainTableCondition): SQLStr += "AND "+self._MainTableCondition+" "
        else:
            SQLStr += "FROM "+self._DBTableName+" "
            SQLStr += "WHERE "+self._DBTableName+"."+self._IDField+" IS NOT NULL "
        SQLStr += self._genConditionSQLStr(args=args)
        StartDT = dt.datetime.strptime(self._FactorDB.fetchall(SQLStr)[0][0], "%Y-%m-%d")
        if start_dt is not None: StartDT = max((StartDT, start_dt))
        if end_dt is None: end_dt = dt.datetime.combine(dt.date.today(), dt.time(0))
        return getDateTimeSeries(start_dt=StartDT, end_dt=end_dt, timedelta=dt.timedelta(1)) 
Example 62
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def _calcMultiMappingData(self, raw_data, factor_names, ids, dts, args={}):
        Data, nDT, nFactor = {}, len(dts), len(factor_names)
        raw_data.set_index(["ID"], inplace=True)
        raw_data["QS_结束日"] = raw_data["QS_结束日"].where(pd.notnull(raw_data["QS_结束日"]), dts[-1]+dt.timedelta(1))
        if args.get("只填起始日", self.OnlyStartFilled):
            raw_data["QS_起始日"] = raw_data["QS_起始日"].where(raw_data["QS_起始日"]>=dts[0], dts[0])
            for iID in raw_data.index.unique():
                iRawData = raw_data.loc[[iID]].set_index(["QS_起始日"])
                iData = pd.DataFrame([([],)*nFactor]*nDT, index=dts, columns=factor_names, dtype="O")
                for jStartDate in iRawData.index.drop_duplicates():
                    iData.iloc[iData.index.searchsorted(jStartDate)] += pd.Series(iRawData.loc[[jStartDate], factor_names].values.T.tolist(), index=factor_names)
                Data[iID] = iData
            return pd.Panel(Data).swapaxes(0, 2).loc[:, :, ids]
        else:
            DeltaDT = dt.timedelta(int(not self._EndDateIncluded))
            for iID in raw_data.index.unique():
                iRawData = raw_data.loc[[iID]].set_index(["QS_起始日", "QS_结束日"])
                iData = pd.DataFrame([([],)*nFactor]*nDT, index=dts, columns=factor_names, dtype="O")
                for jStartDate, jEndDate in iRawData.index.drop_duplicates():
                    ijRawData = iRawData.loc[jStartDate].loc[[jEndDate], factor_names].values.T.tolist()
                    if pd.isnull(jEndDate) or (jEndDate<jStartDate):
                        ijOldData = iData.loc[jStartDate:]
                        iData.loc[jStartDate:] += pd.DataFrame([ijRawData] * ijOldData.shape[0], index=ijOldData.index, columns=ijOldData.columns, dtype="O")
                    else:
                        jEndDate -= DeltaDT
                        ijOldData = iData.loc[jStartDate:jEndDate]
                        iData.loc[jStartDate:jEndDate] += pd.DataFrame([ijRawData] * ijOldData.shape[0], index=ijOldData.index, columns=ijOldData.columns, dtype="O")
                Data[iID] = iData
            return pd.Panel(Data).swapaxes(0, 2).loc[:, :, ids] 
Example 63
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_initArgs__(self):
        super().__QS_initArgs__()
        DateFields = self._FactorInfo[self._FactorInfo["FieldType"]=="Date"].index.tolist()# 所有的日期字段列表
        self.add_trait("DateField", Enum(*DateFields, arg_type="SingleOption", label="日期字段", order=3))
        iFactorInfo = self._FactorInfo[(self._FactorInfo["FieldType"]=="Date") & pd.notnull(self._FactorInfo["Supplementary"])]
        iFactorInfo = iFactorInfo[iFactorInfo["Supplementary"].str.contains("Default")]
        if iFactorInfo.shape[0]>0: self.DateField = iFactorInfo.index[0]
        else: self.DateField = DateFields[0]
    # 返回在给定时点 idt 的有数据记录的 ID
    # 如果 idt 为 None, 将返回所有有历史数据记录的 ID
    # 忽略 ifactor_name
    # 返回在给定时点 idt 的有数据记录的 ID
    # 如果 idt 为 None, 将返回所有有历史数据记录的 ID
    # 忽略 ifactor_name 
Example 64
Project: gullikson-scripts   Author: kgullikson88   File: Sensitivity.py    MIT License 4 votes vote down vote up
def get_companions(starname, sep_max=1.5):
    """
    Find companions to the given star, with maximum separation given
    by the `sep_max` keyword.
    """
    data = StarData.GetData(starname, safe_spt=True)

    # Search for the given star in the database
    match = multiples.loc[multiples.main_id == data.main_id]
    print('{} matches with the same name'.format(len(match)))
    if len(match) < 1:
        return return_primary(data)

    # Now, only keep close companions
    match = match.loc[(match.separation < sep_max) | (match.separation.isnull())]
    print('{} matches that are within {}"'.format(len(match), sep_max))
    if len(match) < 1:
        return return_primary(data)

    # Finally, only keep stars we can figure something out with
    match = match.loc[((match.Sp1.notnull()) & (match.mag1.notnull()) & match.mag2.notnull()) | (
    (match.K1.notnull()) & match.K2.notnull())]
    print('{} matches with sufficient information'.format(len(match)))
    if len(match) < 1:
        return return_primary(data)

    # Get the spectral type for each match
    match['Sp2'] = match.apply(get_sec_spt, axis=1)

    # Only keep the companions that are early type for this
    match = match.loc[(match.Sp2.str.startswith('O')) | (match.Sp2.str.startswith('B'))
                      | (match.Sp2.str.startswith('A')) | (match.Sp2.str.startswith('F'))]
    print('{} matches with early type companions'.format(len(match)))
    if len(match) < 1:
        return return_primary(data)


    # Get the temperature, mass, and radius of the companions
    # Split by the components in the system
    match = match.fillna('AB')
    components = split_by_component(match.copy())

    # Fix spectral type
    components['SpT'] = components['SpT'].map(lambda s: s.replace('m', '5'))
    print(components)
    print(components['SpT'])
    components['companion_mass'] = components['SpT'].map(lambda s: MS.Interpolate('mass', s))
    components['companion_teff'] = components['SpT'].map(lambda s: MS.Interpolate('temperature', s))
    components['companion_radius'] = components['SpT'].map(lambda s: MS.Interpolate('radius', s))

    retdict = {'temperature': list(components['companion_teff']),
               'mass': list(components['companion_mass']),
               'radius': list(components['companion_radius'])}
    return retdict 
Example 65
Project: matchminer-engine   Author: dfci   File: sort.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def sort_by_tier(match, sort_order):
    """
    Highest priority sorting
    """

    idx = (match['sample_id'], match['protocol_no'])

    if 'mmr_status' in match and pd.notnull(match['mmr_status']):
        sort_order[idx] = add_sort_value(sort_value=0,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    elif 'tier' in match and match['tier'] == 1:
        sort_order[idx] = add_sort_value(sort_value=1,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    elif 'tier' in match and match['tier'] == 2:
        sort_order[idx] = add_sort_value(sort_value=2,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    elif 'variant_category' in match and match['variant_category'] == 'CNV':
        sort_order[idx] = add_sort_value(sort_value=3,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    elif 'tier' in match and match['tier'] == 3:
        sort_order[idx] = add_sort_value(sort_value=4,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    elif 'tier' in match and match['tier'] == 4:
        sort_order[idx] = add_sort_value(sort_value=5,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    elif 'wildtype' in match and match['wildtype'] is True:
        sort_order[idx] = add_sort_value(sort_value=6,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    else:
        sort_order[idx] = add_sort_value(sort_value=7,
                                         priority=0,
                                         sort_order_li=sort_order[idx])

    return sort_order 
Example 66
Project: recordlinkage   Author: J535D165   File: encoding.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def phonetic(s, method, concat=True, encoding='utf-8', decode_error='strict'):
    """Convert names or strings into phonetic codes.

    The implemented algorithms are `soundex
    <https://en.wikipedia.org/wiki/Soundex>`_, `nysiis
    <https://en.wikipedia.org/wiki/New_York_State_Identification_and_
    Intelligence_System>`_, `metaphone
    <https://en.wikipedia.org/wiki/Metaphone>`_ or  `match_rating
    <https://en.wikipedia.org/wiki/Match_rating_approach>`_.

    Parameters
    ----------
    s : pandas.Series
        A pandas.Series with string values (often names) to encode.
    method: str
        The algorithm that is used to phonetically encode the values.
        The possible options are "soundex", "nysiis", "metaphone" or
        "match_rating".
    concat: bool, optional
        Remove whitespace before phonetic encoding.
    encoding: str, optional
        If bytes are given, this encoding is used to decode. Default
        is 'utf-8'.
    decode_error: {'strict', 'ignore', 'replace'}, optional
        Instruction on what to do if a byte Series is given that
        contains characters not of the given `encoding`. By default,
        it is 'strict', meaning that a UnicodeDecodeError will be
        raised. Other values are 'ignore' and 'replace'.

    Returns
    -------
    pandas.Series
        A Series with phonetic encoded values.

    """

    # encoding
    if sys.version_info[0] == 2:
        s = s.apply(
            lambda x: x.decode(encoding, decode_error)
            if type(x) == bytes else x)

    if concat:
        s = s.str.replace(r"[\-\_\s]", "")

    for alg in _phonetic_algorithms:
        if method in alg['argument_names']:
            phonetic_callback = alg['callback']
            break
    else:
        raise ValueError("The algorithm '{}' is not known.".format(method))

    return s.str.upper().apply(
        lambda x: phonetic_callback(x) if pandas.notnull(x) else np.nan
    ) 
Example 67
Project: msdas   Author: cokelaer   File: yeast.py    GNU General Public License v3.0 4 votes vote down vote up
def pcolor_na(self, raw=False, fontsize=8):
        """Plot number of NA for protein that are in the small data set

        Final number of rows is therefore larger thatn in the small data set.

        overlap between small and raw data is 32 rows. the remaining 25 are combination from the
        raw data set.

        .. plot::
            :include-source:
            :width: 80%

            from msdas import *
            import pylab
            y  = YEAST2MIDAS(get_yeast_small_data(), get_yeast_raw_data())
            y.cleanup_june()
            errors = y.pcolor_na()

        """
        proteins = list(set(self.df.Protein))
        r = self.replicates
        psites = r.metadata.ix[r.metadata.query("Protein in proteins", engine="python").index].Identifier
        tags = self._measures

        NAs = {}
        for tag in tags:
            df = r.get_replicates_from_one_unique_measurement(tag)
            nas = 3 - pd.notnull(df.ix[psites.index]).sum(axis=1)
            NAs[tag] = nas.values

        NAs = pd.DataFrame(NAs, index=psites)
        NAs = NAs[self._measures]
        NAs = NAs.ix[sorted(NAs.index)]
        pylab.clf()
        pylab.pcolor(NAs)
        N, M = NAs.shape

        pylab.ylim([0,N])
        pylab.xlim(0, M)
        pylab.xticks([0.5 +x for x in range(0,M)], NAs.columns, rotation=90, fontsize=fontsize)
        pylab.yticks([0.5+x for x in range(0,N)], NAs.index, fontsize=fontsize)
        pylab.colorbar()
        pylab.tight_layout()
        return NAs 
Example 68
Project: QuantStudio   Author: Scorpi000   File: BrinsonModel.py    GNU General Public License v3.0 4 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        self._Output["策略组合资产权重"].where(pd.notnull(self._Output["策略组合资产权重"]), 0.0, inplace=True)
        self._Output["基准组合资产权重"].where(pd.notnull(self._Output["基准组合资产权重"]), 0.0, inplace=True)
        self._Output["策略组合资产收益"].where(pd.notnull(self._Output["策略组合资产收益"]), 0.0, inplace=True)
        self._Output["基准组合资产收益"].where(pd.notnull(self._Output["基准组合资产收益"]), 0.0, inplace=True)
        self._Output["策略组合收益"] = self._Output["策略组合资产权重"] * self._Output["策略组合资产收益"]
        self._Output["基准组合收益"] = self._Output["基准组合资产权重"] * self._Output["基准组合资产收益"]
        self._Output["主动资产配置组合收益"] = self._Output["策略组合资产权重"] * self._Output["基准组合资产收益"]
        self._Output["主动个券选择组合收益"] = self._Output["基准组合资产权重"] * self._Output["策略组合资产收益"]
        self._Output["主动资产配置超额收益"] = self._Output["主动资产配置组合收益"] - self._Output["基准组合收益"]
        self._Output["主动个券选择超额收益"] = self._Output["主动个券选择组合收益"] - self._Output["基准组合收益"]
        self._Output["交互作用超额收益"] = self._Output["策略组合收益"] - self._Output["主动个券选择组合收益"] - self._Output["主动资产配置组合收益"] + self._Output["基准组合收益"] 
        self._Output["总超额收益"] = self._Output["策略组合收益"] - self._Output["基准组合收益"]
        self._Output["主动资产配置组合收益(修正)"] = (self._Output["策略组合资产权重"] - self._Output["基准组合资产权重"]) * (self._Output["基准组合资产收益"].T - self._Output["基准组合收益"].sum(axis=1)).T
        self._Output["总计"] = pd.DataFrame(self._Output["策略组合资产权重"].sum(axis=1), columns=["策略组合资产权重"])
        self._Output["总计"]["基准组合资产权重"] = self._Output["基准组合资产权重"].sum(axis=1)
        self._Output["总计"]["策略组合收益"] = self._Output["策略组合收益"].sum(axis=1)
        self._Output["总计"]["基准组合收益"] = self._Output["基准组合收益"].sum(axis=1)
        self._Output["总计"]["主动资产配置组合收益"] = self._Output["主动资产配置组合收益"].sum(axis=1)
        self._Output["总计"]["主动资产配置组合收益(修正)"] = self._Output["主动资产配置组合收益(修正)"].sum(axis=1)
        self._Output["总计"]["主动个券选择组合收益"] = self._Output["主动个券选择组合收益"].sum(axis=1)
        self._Output["总计"]["主动资产配置超额收益"] = self._Output["主动资产配置超额收益"].sum(axis=1)
        self._Output["总计"]["主动个券选择超额收益"] = self._Output["主动个券选择超额收益"].sum(axis=1)
        self._Output["总计"]["交互作用超额收益"] = self._Output["交互作用超额收益"].sum(axis=1)
        self._Output["总计"]["总超额收益"] = self._Output["总超额收益"].sum(axis=1)
        self._Output["多期综合"] = pd.DataFrame(dtype=np.float)
        self._Output["多期综合"]["策略组合收益"] = (self._Output["策略组合收益"] + 1).prod(axis=0) - 1
        self._Output["多期综合"]["基准组合收益"] = (self._Output["基准组合收益"] + 1).prod(axis=0) - 1
        self._Output["多期综合"]["主动资产配置组合收益"] = (self._Output["主动资产配置组合收益"] + 1).prod() - 1
        self._Output["多期综合"]["主动个券选择组合收益"] = (self._Output["主动个券选择组合收益"] + 1).prod() - 1
        self._Output["多期综合"]["主动资产配置超额收益"] = self._Output["多期综合"]["主动资产配置组合收益"] - self._Output["多期综合"]["基准组合收益"]
        self._Output["多期综合"]["主动个券选择超额收益"] = self._Output["多期综合"]["主动个券选择组合收益"] - self._Output["多期综合"]["基准组合收益"]
        self._Output["多期综合"]["交互作用超额收益"] = self._Output["多期综合"]["策略组合收益"] - self._Output["多期综合"]["主动资产配置组合收益"] - self._Output["多期综合"]["主动个券选择组合收益"] + self._Output["多期综合"]["基准组合收益"]
        self._Output["多期综合"]["总超额收益"] = self._Output["多期综合"]["策略组合收益"] - self._Output["多期综合"]["基准组合收益"]
        self._Output["多期综合"].loc["总计"] = (self._Output["总计"] + 1).prod(axis=0) - 1
        k_t = (np.log(1+self._Output["总计"]["策略组合收益"]) - np.log(1+self._Output["总计"]["基准组合收益"])) / (self._Output["总计"]["策略组合收益"] - self._Output["总计"]["基准组合收益"])
        k_t[pd.isnull(k_t)] = 1.0
        if self._Output["多期综合"].loc["总计", "策略组合收益"]!=self._Output["多期综合"].loc["总计", "基准组合收益"]:
            k = (np.log(self._Output["多期综合"].loc["总计", "策略组合收益"]+1) - np.log(self._Output["多期综合"].loc["总计", "基准组合收益"]+1)) / (self._Output["多期综合"].loc["总计", "策略组合收益"] - self._Output["多期综合"].loc["总计", "基准组合收益"])
        else:
            k = 1.0
        self._Output["多期综合"].loc["总计", "主动资产配置超额收益"] = (self._Output["总计"]["主动资产配置超额收益"] * k_t).sum() / k
        self._Output["多期综合"].loc["总计", "主动个券选择超额收益"] = (self._Output["总计"]["主动个券选择超额收益"] * k_t).sum() / k
        self._Output["多期综合"].loc["总计", "交互作用超额收益"] = (self._Output["总计"]["交互作用超额收益"] * k_t).sum() / k
        self._Output["多期综合"].loc["总计", "总超额收益"] = self._Output["多期综合"].loc["总计", "策略组合收益"] - self._Output["多期综合"].loc["总计", "基准组合收益"]
        return 0 
Example 69
Project: QuantStudio   Author: Scorpi000   File: BiasTest.py    GNU General Public License v3.0 4 votes vote down vote up
def _genPortfolio(self, idt, ids):
        PortfolioDict = OrderedDict()
        if self._WeightFactors:
            WeightData = self._FactorTable.readData(factor_names=self._WeightFactors, dts=[idt], ids=ids).iloc[:, 0]
        else:
            WeightData = pd.DataFrame()
        if self._HasEW:
            WeightData["等权"] = pd.Series(1, index=ids) / len(ids)
        if self.IndustryFactor!="无":
            Industry = self._FactorTable.readData(factor_names=[self.IndustryFactor], dts=[idt], ids=ids).iloc[0, 0]
            AllIndustries = Industry[pd.notnull(Industry)].unique()
            AllIndustries.sort()
        if self.StyleFactors:
            StyleFactorData = self._FactorTable.readData(factor_names=list(self.StyleFactors), dts=[idt], ids=ids).iloc[:, 0]
        if self.IndustryNeutralFactors:
            IndNeutralData = self._FactorTable.readData(factor_names=list(self.IndustryNeutralFactors), dts=[idt], ids=ids).iloc[:, 0]
        for iWeightFactor in WeightData:
            iWeightData = WeightData[iWeightFactor]
            iMask = (pd.notnull(iWeightData) & (iWeightData!=0))
            iWeightData = iWeightData[iMask]
            # 全部 ID 组合
            PortfolioDict["全体%s加权组合" % (iWeightFactor,)] = iWeightData / iWeightData.abs().sum()
            # 行业组合
            if self.IndustryFactor!="无":
                for jIndustry in AllIndustries:
                    ijMask = (Industry[iMask]==jIndustry)
                    ijWeightData = iWeightData[ijMask]
                    PortfolioDict["%s行业%s加权组合" % (jIndustry, iWeightFactor)] = ijWeightData / ijWeightData.abs().sum()
                    # 行业中性组合
                    for kFactor in self.IndustryNeutralFactors:
                        kTopPortfolio = ("%sTop%s加权组合" % (kFactor, iWeightFactor))
                        kBottomPortfolio = ("%sBottom%s加权组合" % (kFactor, iWeightFactor))
                        ijkIndNeutralData= IndNeutralData[kFactor][iMask][ijMask]
                        ijkThreshold = ijkIndNeutralData.median()
                        PortfolioDict[kTopPortfolio] = PortfolioDict.get(kTopPortfolio, []) + ijkIndNeutralData[ijkIndNeutralData>ijkThreshold].index.tolist()
                        PortfolioDict[kBottomPortfolio] = PortfolioDict.get(kBottomPortfolio, []) + ijkIndNeutralData[ijkIndNeutralData<=ijkThreshold].index.tolist()
                for kFactor in self.IndustryNeutralFactors:
                    kTopPortfolio = ("%sTop%s加权组合" % (kFactor, iWeightFactor))
                    kPortfolio = iWeightData.loc[PortfolioDict.pop(kTopPortfolio)]
                    PortfolioDict[kTopPortfolio] = kPortfolio / kPortfolio.abs().sum()
                    kBottomPortfolio = ("%sBottom%s加权组合" % (kFactor, iWeightFactor))
                    kPortfolio = iWeightData.loc[PortfolioDict.pop(kBottomPortfolio)]
                    PortfolioDict[kBottomPortfolio] = kPortfolio / kPortfolio.abs().sum()
            # 风格因子组合
            for jFactor in self.StyleFactors:
                jFactorData = StyleFactorData[jFactor][iMask]
                ijWeightData = iWeightData[jFactorData>=jFactorData.quantile(0.8)]
                PortfolioDict["%s风格Top%s加权组合" % (jFactor, iWeightFactor)] = ijWeightData / ijWeightData.abs().sum()
                ijWeightData = iWeightData[jFactorData<=jFactorData.quantile(0.2)]
                PortfolioDict["%s风格Bottom%s加权组合" % (jFactor, iWeightFactor)] = ijWeightData / ijWeightData.abs().sum()
            # 随机组合
            for jNum in self.RandomNums:
                PortfolioDict["随机%d%s加权组合" % (jNum, iWeightFactor)] = genRandomPortfolio(ids, target_num=20, weight=iWeightData)
        return PortfolioDict 
Example 70
Project: QuantStudio   Author: Scorpi000   File: PortfolioStrategy.py    GNU General Public License v3.0 4 votes vote down vote up
def trade(self, idt, trading_record, signal):
        if self.TargetAccount is None: return 0
        AccountValue = abs(self.TargetAccount.AccountValue)
        PositionAmount = self.TargetAccount.PositionAmount
        if signal is not None:# 有新的信号, 形成新的交易目标
            if signal.shape[0]>0:
                signal = signal.loc[PositionAmount.index]
                signal.fillna(0.0, inplace=True)
            else:
                signal = pd.Series(0.0, index=PositionAmount.index)
            if self.TradeTarget=="锁定买卖金额":
                self._TradeTarget = signal * AccountValue - PositionAmount
            elif self.TradeTarget=="锁定目标权重":
                self._TradeTarget = signal
            elif self.TradeTarget=="锁定目标金额":
                self._TradeTarget = signal * AccountValue
            self._SignalExcutePeriod = 0
        elif self._TradeTarget is not None:# 没有新的信号, 根据交易记录调整交易目标
            self._SignalExcutePeriod += 1
            if self._SignalExcutePeriod>=self.SignalValidity:
                self._TradeTarget = None
                self._SignalExcutePeriod = 0
            else:
                iTradingRecord = trading_record[self.TargetAccount.Name]
                if iTradingRecord.shape[0]>0:
                    if self.TradeTarget=="锁定买卖金额":
                        TargetChanged = pd.Series((iTradingRecord["买卖数量"] * iTradingRecord["价格"]).values, index=iTradingRecord["ID"].values)
                        TargetChanged = TargetChanged.groupby(axis=0, level=0).sum().loc[self._TradeTarget.index]
                        TargetChanged.fillna(0.0, inplace=True)
                        TradeTarget = self._TradeTarget - TargetChanged
                        TradeTarget[np.sign(self._TradeTarget)*np.sign(TradeTarget)<0] = 0.0
                        self._TradeTarget = TradeTarget
        # 根据交易目标下订单
        if self._TradeTarget is not None:
            if self.TradeTarget=="锁定买卖金额":
                Orders = self._TradeTarget
            elif self.TradeTarget=="锁定目标权重":
                Orders = self._TradeTarget * AccountValue - PositionAmount
            elif self.TradeTarget=="锁定目标金额":
                Orders = self._TradeTarget - PositionAmount
            Orders = Orders / self.TargetAccount.LastPrice
            Orders = Orders[pd.notnull(Orders) & (Orders!=0)]
            if Orders.shape[0]==0: return 0
            Orders = pd.DataFrame(Orders.values, index=Orders.index, columns=["数量"])
            Orders["目标价"] = np.nan
            self.TargetAccount.order(combined_order=Orders)
        return 0
    # 配置权重 
Example 71
Project: QuantStudio   Author: Scorpi000   File: PortfolioStrategy.py    GNU General Public License v3.0 4 votes vote down vote up
def _allocateWeight(self, idt, ids, original_ids, args):
        nID = len(ids)
        if not args.GroupFactors:# 没有类别因子
            if args.WeightFactor=='等权': NewSignal = pd.Series(1/nID, index=ids)
            else:
                WeightData = self._FT.readData(factor_names=[args.WeightFactor], dts=[idt], ids=ids).iloc[0,0,:]
                if args.WeightMiss=='舍弃': WeightData = WeightData[pd.notnull(WeightData)]
                else: WeightData[pd.notnull(WeightData)] = WeightData.mean()
                WeightData = WeightData / WeightData.sum()
                NewSignal = WeightData
        else:
            GroupData = self._FT.readData(factor_names=args.GroupFactors, dts=[idt], ids=original_ids).iloc[:,0,:]
            GroupData[pd.isnull(GroupData)] = np.nan
            AllGroups = [GroupData[iGroup].unique().tolist() for iGroup in args.GroupFactors]
            AllGroups = CartesianProduct(AllGroups)
            nGroup = len(AllGroups)
            if args.GroupWeight=='等权': GroupWeight = pd.Series(np.ones(nGroup)/nGroup, dtype='float')
            else:
                GroupWeight = pd.Series(index=np.arange(nGroup), dtype='float')
                GroupWeightData = self._FT.readData(factor_names=[args.GroupWeight], dts=[idt], ids=original_ids).iloc[0,0,:]
                for i, iGroup in enumerate(AllGroups):
                    if pd.notnull(iGroup[0]): iMask = (GroupData[args.GroupFactors[0]]==iGroup[0])
                    else: iMask = pd.isnull(GroupData[args.GroupFactors[0]])
                    for j, jSubGroup in enumerate(iGroup[1:]):
                        if pd.notnull(jSubGroup): iMask = (iMask & (GroupData[args.GroupFactors[j+1]]==jSubGroup))
                        else: iMask = (iMask & pd.isnull(GroupData[args.GroupFactors[j+1]]))
                    GroupWeight.iloc[i] = GroupWeightData[iMask].sum()
                GroupWeight[pd.isnull(GroupWeight)] = 0
                GroupTotalWeight = GroupWeight.sum()
                if GroupTotalWeight!=0: GroupWeight = GroupWeight/GroupTotalWeight
            if args.WeightFactor=='等权': WeightData = pd.Series(1.0, index=original_ids)
            else: WeightData = self._FT.readData(factor_names=[args.WeightFactor], dts=[idt], ids=original_ids).iloc[0,0,:]
            SelectedGroupData = GroupData.loc[ids]
            NewSignal = pd.Series()
            for i, iGroup in enumerate(AllGroups):
                if pd.notnull(iGroup[0]): iMask = (SelectedGroupData[args.GroupFactors[0]]==iGroup[0])
                else: iMask = pd.isnull(SelectedGroupData[args.GroupFactors[0]])
                for j, jSubGroup in enumerate(iGroup[1:]):
                    if pd.notnull(jSubGroup): iMask = (iMask & (SelectedGroupData[args.GroupFactors[j+1]]==jSubGroup))
                    else: iMask = (iMask & pd.isnull(SelectedGroupData[args.GroupFactors[j+1]]))
                iIDs = SelectedGroupData[iMask].index.tolist()
                if (iIDs==[]) and (args.GroupMiss=='全配'):
                    if pd.notnull(iGroup[0]): iMask = (GroupData[args.GroupFactors[0]]==iGroup[0])
                    else: iMask = pd.isnull(GroupData[args.GroupFactors[0]])
                    for k, kSubClass in enumerate(iGroup[1:]):
                        if pd.notnull(kSubClass): iMask = (iMask & (GroupData[args.GroupFactors[k+1]]==kSubClass))
                        else: iMask = (iMask & pd.isnull(GroupData[args.GroupFactors[k+1]]))
                    iIDs = GroupData[iMask].index.tolist()
                elif (iIDs==[]) and (args.GroupMiss=='忽略'): continue
                iSignal = WeightData.loc[iIDs]
                iSignalWeight = iSignal.sum()
                if iSignalWeight!=0: iSignal = iSignal / iSignalWeight * GroupWeight.iloc[i]
                else: iSignal = iSignal*0.0
                if args.WeightMiss=='填充均值': iSignal[pd.isnull(iSignal)] = iSignal.mean()
                NewSignal = NewSignal.append(iSignal[pd.notnull(iSignal) & (iSignal!=0)])
            NewSignal = NewSignal / NewSignal.sum()
        return NewSignal
    # 将信号缓存, 并弹出滞后期到期的信号 
Example 72
Project: QuantStudio   Author: Scorpi000   File: PortfolioStrategy.py    GNU General Public License v3.0 4 votes vote down vote up
def _filtrateID(self, idt, ids, args):
        FactorData = self._FT.readData(dts=[idt], ids=ids, factor_names=[args.TargetFactor]).iloc[0,0,:]
        FactorData = FactorData[pd.notnull(FactorData)]
        if args.FactorOrder=='降序': FactorData = -FactorData
        FactorData = FactorData.sort_values(ascending=True)
        if args.FiltrationType=='定比':
            UpLimit = FactorData.quantile(args.FilterUpLimit)
            DownLimit = FactorData.quantile(args.FilterDownLimit)
            NewIDs = FactorData[(FactorData>=DownLimit) & (FactorData<=UpLimit)].index.tolist()
        elif args.FiltrationType=='定量':
            NewIDs = FactorData.iloc[:args.FilterNum].index.tolist()
        elif args.FiltrationType=='定量&定比':
            UpLimit = FactorData.quantile(args.FilterUpLimit)
            DownLimit = FactorData.quantile(args.FilterDownLimit)
            NewIDs = FactorData.iloc[:args.FilterNum].index.intersection(FactorData[(FactorData>=DownLimit) & (FactorData<=UpLimit)].index).tolist()
        if not args.TurnoverBuffer.isBuffer: return NewIDs
        SignalIDs = set(NewIDs)
        nSignalID = len(SignalIDs)
        if args.SignalType=="多头信号":
            if self._AllSignals=={}: LastIDs = set()
            else: LastIDs = set(self._AllSignals[max(self._AllSignals)].index)
        else:
            if self._AllShortSignals=={}: LastIDs = set()
            else: LastIDs = set(self._AllShortSignals[max(self._AllShortSignals)].index)
        if args.FiltrationType=='定比':
            UpLimit = FactorData.quantile(min(1.0, args.FilterUpLimit+args.TurnoverBuffer.FilterUpBuffer))
            DownLimit = FactorData.quantile(max(0.0, args.FilterDownLimit-args.TurnoverBuffer.FilterDownBuffer))
            NewIDs = LastIDs.intersection(FactorData[(FactorData>=DownLimit) & (FactorData<=UpLimit)].index)
        elif args.FiltrationType=='定量':
            NewIDs = LastIDs.intersection(FactorData.iloc[:args.FilterNum+args.TurnoverBuffer.FilterNumBuffer].index)
        elif args.FiltrationType=='定量&定比':
            UpLimit = FactorData.quantile(min(1.0, args.FilterUpLimit+args.TurnoverBuffer.FilterUpBuffer))
            DownLimit = FactorData.quantile(max(0.0, args.FilterDownLimit-args.TurnoverBuffer.FilterDownBuffer))
            NewIDs = LastIDs.intersection(FactorData.iloc[:args.FilterNum+args.TurnoverBuffer.FilterNumBuffer].index).intersection(FactorData[(FactorData>=DownLimit) & (FactorData<=UpLimit)].index)
        if len(NewIDs)>=nSignalID:# 当前持有的股票已经满足要求
            FactorData = FactorData[list(NewIDs)].copy()
            FactorData.sort_values(inplace=True, ascending=True)
            return FactorData.iloc[:nSignalID].index.tolist()
        SignalIDs = list(SignalIDs.difference(NewIDs))
        FactorData = FactorData[SignalIDs].copy()
        FactorData.sort_values(inplace=True, ascending=True)
        return list(NewIDs)+FactorData.iloc[:(nSignalID-len(NewIDs))].index.tolist() 
Example 73
Project: QuantStudio   Author: Scorpi000   File: AbnormalReturn.py    GNU General Public License v3.0 4 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        CurInd = self._AllDTs.index(idt)
        if CurInd<=self.EventPreWindow+self.EstWindow: return 0
        self._Output["事件记录"][:, 2] += 1
        IDs = self._FactorTable.getFilteredID(idt=idt, id_filter_str=self.EventFilter)
        nID, EventWindow = len(IDs), self.EventPreWindow+1+self.EventPostWindow
        if nID>0:
            self._Output["事件记录"] = np.r_[self._Output["事件记录"], np.c_[IDs, [idt]*nID, np.zeros(shape=(nID, 1))]]
            self._Output["正常收益率"] = np.r_[self._Output["正常收益率"], np.full(shape=(nID, EventWindow), fill_value=np.nan)]
            self._Output["异常收益率"] = np.r_[self._Output["异常收益率"], np.full(shape=(nID, EventWindow), fill_value=np.nan)]
            self._Output["异常协方差"] = np.r_[self._Output["异常协方差"], np.full(shape=(nID, EventWindow, EventWindow), fill_value=np.nan)]
            EstStartInd = CurInd - self.EventPreWindow - self.EstWindow - 1
            Price = self._FactorTable.readData(dts=self._AllDTs[EstStartInd:CurInd+1], ids=IDs, factor_names=[self.PriceFactor]).iloc[0, :, :]
            Return = _calcReturn(Price.values, return_type=self.ReturnType)
            BPrice = self._BenchmarkFT.readData(factor_names=[self.BenchmarkPrice], ids=[self.BenchmarkID], dts=self._AllDTs[EstStartInd:CurInd+1]).iloc[0, :, :]
            BReturn = _calcReturn(BPrice.values, return_type=self.ReturnType).repeat(nID, axis=1)
            EstReturn = Return[:self.EstWindow]
            if self.EstSampleFilter:
                temp = self._FactorTable.readData(dts=self._AllDTs[EstStartInd+1:EstStartInd+self.EstWindow+1], ids=IDs, factor_names=self._FilterFactors)
                FilterMask = eval(self._CompiledIDFilterStr).values
            else:
                FilterMask = np.full(EstReturn.shape, fill_value=True)
            FilterMask = (FilterMask & pd.notnull(EstReturn) & pd.notnull(BReturn[:self.EstWindow]))
            FilterMask = (FilterMask & (np.flipud(np.cumsum(np.flipud(FilterMask), axis=0))<=self.EstSampleLen))
            EstReturn[~FilterMask] = np.nan
            Var = np.nanvar(EstReturn - BReturn[:self.EstWindow], axis=0, ddof=1)
            FilterMask = ((np.sum(FilterMask, axis=0)<self.EstSampleLen) | (Var<1e-6))
            Var[FilterMask] = np.nan
            self._Output["正常收益率"][-nID:, :self.EventPreWindow+1] = BReturn[self.EstWindow:].T
            self._Output["异常收益率"][-nID:, :self.EventPreWindow+1] = (Return[self.EstWindow:] - BReturn[self.EstWindow:]).T
            CovMatrix = np.eye(EventWindow).reshape((1, EventWindow, EventWindow)).repeat(nID, axis=0)
            self._Output["异常协方差"][-nID:, :, :] = (CovMatrix.T*Var).T
        Mask = (self._Output["事件记录"][:, 2]<=self.EventPostWindow)
        if np.sum(Mask)==0: return 0
        IDs = self._Output["事件记录"][:, 0][Mask]
        RowPos, ColPos = np.arange(self._Output["异常收益率"].shape[0])[Mask].tolist(), (self._Output["事件记录"][Mask, 2]+self.EventPreWindow).astype(np.int)
        BPrice = self._BenchmarkFT.readData(factor_names=[self.BenchmarkPrice], ids=[self.BenchmarkID], dts=[self._AllDTs[CurInd-1], idt]).iloc[0, :, 0]
        BReturn = _calcReturn(BPrice.values, return_type=self.ReturnType).repeat(len(IDs), axis=0)
        self._Output["正常收益率"][RowPos, ColPos] = BReturn
        Price = self._FactorTable.readData(dts=[self._AllDTs[CurInd-1], idt], ids=sorted(set(IDs)), factor_names=[self.PriceFactor]).iloc[0, :, :].loc[:, IDs]
        self._Output["异常收益率"][RowPos, ColPos] = (_calcReturn(Price.values, return_type=self.ReturnType)[0] - BReturn)
        return 0 
Example 74
Project: QuantStudio   Author: Scorpi000   File: ReturnDecomposition.py    GNU General Public License v3.0 4 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        FactorNames = list(self.TestFactors)
        self._Output["Pure Return"] = pd.DataFrame(self._Output["Pure Return"], index=self._Output["时点"], columns=FactorNames)
        self._Output["Raw Return"] = pd.DataFrame(self._Output["Raw Return"], index=self._Output["时点"], columns=FactorNames)
        self._Output["滚动t统计量_Pure"] = pd.DataFrame(np.nan, index=self._Output["时点"], columns=FactorNames)
        self._Output["滚动t统计量_Raw"] = pd.DataFrame(np.nan, index=self._Output["时点"], columns=FactorNames)
        self._Output["回归t统计量(Raw Return)"] = pd.DataFrame(self._Output["回归t统计量(Raw Return)"], index=self._Output["时点"], columns=FactorNames)
        self._Output["回归t统计量(Pure Return)"] = pd.DataFrame(self._Output["回归t统计量(Pure Return)"], index=self._Output["时点"], columns=FactorNames)
        self._Output["回归F统计量"] = pd.DataFrame(self._Output["回归F统计量"], index=self._Output["时点"], columns=FactorNames+["所有因子"])
        self._Output["回归R平方"] = pd.DataFrame(self._Output["回归R平方"], index=self._Output["时点"], columns=FactorNames+["所有因子"])
        self._Output["回归调整R平方"] = pd.DataFrame(self._Output["回归调整R平方"], index=self._Output["时点"], columns=FactorNames+["所有因子"])
        nDT = self._Output["Raw Return"].shape[0]
        # 计算滚动t统计量
        for i in range(nDT):
            if i<self.RollAvgPeriod-1: continue
            iReturn = self._Output["Pure Return"].iloc[i-self.RollAvgPeriod+1:i+1, :]
            self._Output["滚动t统计量_Pure"].iloc[i] = iReturn.mean(axis=0) / iReturn.std(axis=0) * pd.notnull(iReturn).sum(axis=0)**0.5
            iReturn = self._Output["Raw Return"].iloc[i-self.RollAvgPeriod+1:i+1, :]
            self._Output["滚动t统计量_Raw"].iloc[i] = iReturn.mean(axis=0) / iReturn.std(axis=0) * pd.notnull(iReturn).sum(axis=0)**0.5
        nYear = (self._Output["时点"][-1] - self._Output["时点"][0]).days / 365
        self._Output["统计数据"] = pd.DataFrame(index=self._Output["Pure Return"].columns)
        self._Output["统计数据"]["年化收益率(Pure)"] = ((1 + self._Output["Pure Return"]).prod())**(1/nYear) - 1
        self._Output["统计数据"]["跟踪误差(Pure)"] = self._Output["Pure Return"].std() * np.sqrt(nDT/nYear)
        self._Output["统计数据"]["信息比率(Pure)"] = self._Output["统计数据"]["年化收益率(Pure)"] / self._Output["统计数据"]["跟踪误差(Pure)"]
        self._Output["统计数据"]["胜率(Pure)"] = (self._Output["Pure Return"]>0).sum() / nDT
        self._Output["统计数据"]["t统计量(Pure)"] = self._Output["Pure Return"].mean() / self._Output["Pure Return"].std() * np.sqrt(nDT)
        self._Output["统计数据"]["年化收益率(Raw)"] = (1 + self._Output["Raw Return"]).prod()**(1/nYear) - 1
        self._Output["统计数据"]["跟踪误差(Raw)"] = self._Output["Raw Return"].std() * np.sqrt(nDT/nYear)
        self._Output["统计数据"]["信息比率(Raw)"] = self._Output["统计数据"]["年化收益率(Raw)"] / self._Output["统计数据"]["跟踪误差(Raw)"]
        self._Output["统计数据"]["胜率(Raw)"] = (self._Output["Raw Return"]>0).sum() / nDT
        self._Output["统计数据"]["t统计量(Raw)"] = self._Output["Raw Return"].mean() / self._Output["Raw Return"].std() * np.sqrt(nDT)
        self._Output["统计数据"]["年化收益率(Pure-Naive)"] = (1 + self._Output["Pure Return"] - self._Output["Raw Return"]).prod()**(1/nYear) - 1
        self._Output["统计数据"]["跟踪误差(Pure-Naive)"] = (self._Output["Pure Return"] - self._Output["Raw Return"]).std() * np.sqrt(nDT/nYear)
        self._Output["统计数据"]["信息比率(Pure-Naive)"] = self._Output["统计数据"]["年化收益率(Pure-Naive)"] / self._Output["统计数据"]["跟踪误差(Pure-Naive)"]
        self._Output["统计数据"]["胜率(Pure-Naive)"] = (self._Output["Pure Return"] - self._Output["Raw Return"]>0).sum() / nDT
        self._Output["统计数据"]["t统计量(Pure-Naive)"] = (self._Output["Pure Return"] - self._Output["Raw Return"]).mean() / (self._Output["Pure Return"] - self._Output["Raw Return"]).std() * np.sqrt(nDT)
        self._Output["回归统计量均值"] = pd.DataFrame(index=FactorNames+["所有因子"])
        self._Output["回归统计量均值"]["t统计量(Raw Return)"] = self._Output["回归t统计量(Raw Return)"].mean()
        self._Output["回归统计量均值"]["t统计量(Pure Return)"] = self._Output["回归t统计量(Pure Return)"].mean()
        self._Output["回归统计量均值"]["F统计量"] = self._Output["回归F统计量"].mean()
        self._Output["回归统计量均值"]["R平方"] = self._Output["回归R平方"].mean()
        self._Output["回归统计量均值"]["调整R平方"] = self._Output["回归调整R平方"].mean()
        self._Output.pop("时点")
        return 0 
Example 75
Project: QuantStudio   Author: Scorpi000   File: IC.py    GNU General Public License v3.0 4 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
            PreInd = self._CurCalcInd - self.LookBack
            LastInd = self._CurCalcInd - 1
            PreDateTime = self.CalcDTs[PreInd]
            LastDateTime = self.CalcDTs[LastInd]
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
            PreInd = self._CurCalcInd - self.LookBack
            LastInd = self._CurCalcInd - 1
            PreDateTime = self._Model.DateTimeSeries[PreInd]
            LastDateTime = self._Model.DateTimeSeries[LastInd]
        if (PreInd<0) or (LastInd<0):
            for iFactorName in self.TestFactors:
                self._Output["IC"][iFactorName].append(np.nan)
                self._Output["股票数"][iFactorName].append(np.nan)
            self._Output["时点"].append(idt)
            return 0
        PreIDs = self._FactorTable.getFilteredID(idt=PreDateTime, id_filter_str=self.IDFilter)
        FactorExpose = self._FactorTable.readData(dts=[PreDateTime], ids=PreIDs, factor_names=list(self.TestFactors)).iloc[:, 0, :]
        Price = self._FactorTable.readData(dts=[LastDateTime, idt], ids=PreIDs, factor_names=[self.PriceFactor]).iloc[0, :, :]
        Ret = Price.iloc[-1] / Price.iloc[0] - 1
        if self.IndustryFactor!="无":# 进行收益率的行业调整
            IndustryData = self._FactorTable.readData(dts=[LastDateTime], ids=PreIDs, factor_names=[self.IndustryFactor]).iloc[0, 0, :]
            AllIndustry = IndustryData.unique()
            if self.WeightFactor=="等权":
                for iIndustry in AllIndustry:
                    iMask = (IndustryData==iIndustry)
                    Ret[iMask] -= Ret[iMask].mean()
            else:
                WeightData = self._FactorTable.readData(dts=[LastDateTime], ids=PreIDs, factor_names=[self.WeightFactor]).iloc[0, 0, :]
                for iIndustry in AllIndustry:
                    iMask = (IndustryData==iIndustry)
                    iWeight = WeightData[iMask]
                    iRet = Ret[iMask]
                    Ret[iMask] -= (iRet*iWeight).sum() / iWeight[pd.notnull(iWeight) & pd.notnull(iRet)].sum(skipna=False)
        for iFactorName in self.TestFactors:
            self._Output["IC"][iFactorName].append(FactorExpose[iFactorName].corr(Ret, method=self.CorrMethod))
            self._Output["股票数"][iFactorName].append(pd.notnull(FactorExpose[iFactorName]).sum())
        self._Output["时点"].append(idt)
        return 0 
Example 76
Project: QuantStudio   Author: Scorpi000   File: IC.py    GNU General Public License v3.0 4 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
            LastInd = self._CurCalcInd - 1
            LastDateTime = self.CalcDTs[LastInd]
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
            LastInd = self._CurCalcInd - 1
            LastDateTime = self._Model.DateTimeSeries[LastInd]
        if (LastInd<0):
            for i, iRollBack in enumerate(self.LookBack):
                self._Output["IC"][i].append(np.nan)
            self._Output["时点"].append(idt)
            return 0
        Price = self._FactorTable.readData(dts=[LastDateTime, idt], ids=self._FactorTable.getID(ifactor_name=self.PriceFactor), factor_names=[self.PriceFactor]).iloc[0]
        Ret = Price.iloc[1] / Price.iloc[0] - 1
        for i, iRollBack in enumerate(self.LookBack):
            iPreInd = self._CurCalcInd - iRollBack
            if iPreInd<0:
                self._Output["IC"][i].append(np.nan)
                continue
            iPreDT = self.CalcDTs[iPreInd]
            iPreIDs = self._FactorTable.getFilteredID(idt=iPreDT, id_filter_str=self.IDFilter)
            iRet = Ret.loc[iPreIDs].copy()
            if self.IndustryFactor!="无":
                IndustryData = self._FactorTable.readData(dts=[iPreDT], ids=iPreIDs, factor_names=[self.IndustryFactor]).iloc[0,0,:]
                AllIndustry = IndustryData.unique()
                # 进行收益率的行业调整
                if self.WeightFactor=="等权":
                    for iIndustry in AllIndustry:
                        iRet[IndustryData==iIndustry] -= iRet[IndustryData==iIndustry].mean()
                else:
                    WeightData = self._FactorTable.readData(dts=[iPreDT], ids=iPreIDs, factor_names=[self.WeightFactor]).iloc[0,0,:]
                    for iIndustry in AllIndustry:
                        iWeight = WeightData[IndustryData==iIndustry]
                        iiRet = iRet[IndustryData==iIndustry]
                        iRet[IndustryData==iIndustry] -= (iiRet * iWeight).sum() / iWeight[pd.notnull(iWeight) & pd.notnull(iiRet)].sum(skipna=False)
            iFactorExpose = self._FactorTable.readData(dts=[iPreDT], ids=iPreIDs, factor_names=[self.TestFactor]).iloc[0,0,:]
            self._Output["IC"][i].append(iFactorExpose.corr(iRet, method=self.CorrMethod))
        self._Output["时点"].append(idt)
        return 0 
Example 77
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 4 votes vote down vote up
def prepareRegressData(Y, X=None, x_varnames=None, has_constant=False, dummy_data=None, drop_dummy_na=False):
    NotNAMask = pd.notnull(Y)
    if X is None:
        if (dummy_data is None) and (not has_constant):
            return (NotNAMask,[],Y[NotNAMask],X[NotNAMask])
        x_varnames = []
    else:
        if np.ndim(X)>1:
            NotNAMask = ((np.sum(pd.isnull(X),axis=1)==0) & NotNAMask)
        else:
            NotNAMask = (pd.notnull(X) & NotNAMask)
            X = X.reshape((X.shape[0],1))
        if x_varnames is None:
            x_varnames = ["x_"+str(i) for i in range(X.shape[1])]
    # 展开Dummy因子
    if dummy_data is not None:
        if np.ndim(dummy_data)==1:
            dummy_data = dummy_data.reshape((dummy_data.shape[0],1))
        if drop_dummy_na:
            NotNAMask = (NotNAMask & (np.sum(pd.isnull(dummy_data),axis=1)==0))
        else:
            dummy_data[pd.isnull(dummy_data)] = np.nan
        dummy_data = dummy_data[NotNAMask]
        if X is not None:
            X = X[NotNAMask]
        Y = Y[NotNAMask]
        for i in range(dummy_data.shape[1]):
            AllCats = pd.unique(dummy_data[:,i])
            if (has_constant) or (i>0):
                AllCats = AllCats[:-1]
            if AllCats.shape[0]==0:
                continue
            iX = np.zeros((dummy_data.shape[0],AllCats.shape[0]))
            for j,jCat in enumerate(AllCats):
                if pd.isnull(jCat):
                    iX[pd.isnull(dummy_data[:,i]),j] = 1.0
                else:
                    iX[dummy_data[:,i]==jCat,j] = 1.0
            if X is not None:
                X = np.hstack((X,iX))
            else:
                X = iX
            x_varnames += list(AllCats)
    else:
        if X is not None:
            X = X[NotNAMask]
        Y = Y[NotNAMask]
    if has_constant:
        if X is None:
            X = np.ones((Y.shape[0],1))
        elif X.shape[0]>0:
            X = sm.add_constant(X, prepend=True)
        else:
            X = X.reshape((0,X.shape[1]+1))
        x_varnames = ["constant"]+x_varnames
    return (NotNAMask, x_varnames, Y, X)
# Z-Score 标准化
# data: 待标准化的数据, array; cat_data: 分类数据, array
# avg_statistics: 平均统计量, 可选: 平均值, 中位数; dispersion_statistics: 离散统计量, 可选: 标准差, MAD
# avg_weight: 计算平均度的权重; dispersion_weight: 计算离散度的权重 
Example 78
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 4 votes vote down vote up
def standardizeZScore(data, mask=None, cat_data=None, avg_statistics="平均值", dispersion_statistics="标准差", avg_weight=None, dispersion_weight=None, other_handle='填充None'):
    """Z-Score 标准化"""
    StdData = np.empty(data.shape,dtype='float')+np.nan
    if mask is None:
        mask = pd.isnull(StdData)
    nData = data.shape[0]
    if (avg_statistics!='平均值') or (avg_weight is None):
        avg_weight = np.ones(nData)/nData
    AvgWeightInd = pd.notnull(avg_weight)
    if (dispersion_statistics!='标准差') or (dispersion_weight is None):
        dispersion_weight = np.ones(nData)/nData
    StdWeightInd = pd.notnull(dispersion_weight)
    CatMasks = maskCategary(nData,cat_data=cat_data,mask=mask)
    for jCat,jCatMask in CatMasks.items():
        jTotalMask = (pd.notnull(data) & AvgWeightInd & jCatMask)
        if avg_statistics=='平均值':
            TotalWeight = np.nansum(avg_weight[jTotalMask])
            if TotalWeight!=0:
                jAvg = np.nansum(data[jTotalMask]*avg_weight[jTotalMask])/TotalWeight
            else:
                StdData[jTotalMask] = np.nan
                continue
        elif avg_statistics=='中位数':
            jAvg = np.nanmedian(data[jTotalMask])
        jTotalMask = (pd.notnull(data) & StdWeightInd & jCatMask)
        if dispersion_statistics=='标准差':
            TotalWeight = np.nansum(dispersion_weight[jTotalMask])
            if TotalWeight!=0:
                jStd = np.sqrt(np.nansum(((data[jTotalMask]-np.nanmean(data[jTotalMask]))**2*dispersion_weight[jTotalMask]/TotalWeight)))
            else:
                StdData[jTotalMask] = np.nan
                continue
        elif dispersion_statistics=='MAD':
            jStd = 1.483*np.nanmedian(np.abs(data[jTotalMask]-np.nanmedian(data[jTotalMask])))
        if jStd!=0:
            StdData[jTotalMask] = (data[jTotalMask]-jAvg)/jStd
        else:
            StdData[jTotalMask] = 0.0
    if other_handle=="保持不变":
        StdData[~mask] = data[~mask]
    return StdData

# Rank 标准化
# data: 待标准化的数据, array; cat_data: 分类数据, array
# ascending: 是否升序, 可选: True, False; uniformization: 是否归一 
Example 79
Project: QuantStudio   Author: Scorpi000   File: ArcticDB.py    GNU General Public License v3.0 4 votes vote down vote up
def writeData(self, data, table_name, if_exists="update", data_type={}, **kwargs):
        if data.shape[0]==0: return 0
        if table_name not in self._Arctic.list_libraries(): return self._writeNewData(data, table_name, data_type=data_type)
        Lib = self._Arctic[table_name]
        DataCols = [str(i) for i in range(data.shape[0])]
        #DTRange = pd.date_range(data.major_axis[0], data.major_axis[-1], freq=Freq)
        DTRange = data.major_axis
        OverWrite = (if_exists=="update")
        for i, iID in enumerate(data.minor_axis):
            iData = data.iloc[:, :, i]
            if not Lib.has_symbol(iID):
                iMetaData = {"FactorNames":iData.columns.tolist(), "Cols":DataCols}
                iData.index.name, iData.columns = "date", DataCols
                Lib.write(iID, iData, metadata=iMetaData)
                continue
            iMetaData = Lib.read_metadata(symbol=iID)
            iOldFactorNames, iCols = iMetaData["FactorNames"], iMetaData["Cols"]
            iNewFactorNames = iData.columns.difference(iOldFactorNames).tolist()
            #iCrossFactorNames = iOldFactorNames.intersection(iData.columns).tolist()
            iOldData = Lib.read(symbol=iID, chunk_range=DTRange, filter_data=True)
            if iOldData.shape[0]>0:
                iOldData.columns = iOldFactorNames
                iOldData = iOldData.loc[iOldData.index.union(iData.index), iOldFactorNames+iNewFactorNames]
                iOldData.update(iData, overwrite=OverWrite)
            else:
                iOldData = iData.loc[:, iOldFactorNames+iNewFactorNames]
            if iNewFactorNames:
                iCols += [str(i) for i in range(iOldData.shape[1], iOldData.shape[1]+len(iNewFactorNames))]
                #iOldData = pd.merge(iOldData, iData.loc[:, iNewFactorNames], how="outer", left_index=True, right_index=True)
            #if iCrossFactorNames:
                #iOldData = iOldData.loc[iOldData.index.union(iData.index), :]
                #iOldData.update(iData, overwrite=OverWrite)
                #if if_exists=="update": iOldData.loc[iData.index, iCrossFactorNames] = iData.loc[:, iCrossFactorNames]
                #else: iOldData.loc[iData.index, iCrossFactorNames] = iOldData.loc[iData.index, iCrossFactorNames].where(pd.notnull(iOldData.loc[iData.index, iCrossFactorNames]), iData.loc[:, iCrossFactorNames])
            iOldData.index.name, iOldData.columns ="date", iCols
            iMetaData["FactorNames"], iMetaData["Cols"] = iOldFactorNames+iNewFactorNames, iCols
            Lib.update(iID, iOldData, metadata=iMetaData, chunk_range=DTRange)
        FactorInfo = Lib.read(symbol="_FactorInfo").set_index("FactorName")
        NewFactorNames = data.items.difference(FactorInfo.index).tolist()
        FactorInfo = FactorInfo.loc[FactorInfo.index.tolist()+NewFactorNames, :]
        for iFactorName in NewFactorNames:
            if iFactorName in data_type: FactorInfo.loc[iFactorName, "DataType"] = data_type[iFactorName]
            elif np.dtype('O') in data.loc[iFactorName].dtypes: FactorInfo.loc[iFactorName, "DataType"] = "string"
            else: FactorInfo.loc[iFactorName, "DataType"] = "double"
        Lib.write("_FactorInfo", FactorInfo.reset_index(), chunker=arctic.chunkstore.passthrough_chunker.PassthroughChunker())
        return 0 
Example 80
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 4 votes vote down vote up
def _adjustRawDataByRelatedField(self, raw_data, fields):
        RelatedFields = self._FactorInfo["RelatedSQL"].loc[fields]
        RelatedFields = RelatedFields[pd.notnull(RelatedFields)]
        if RelatedFields.shape[0]==0: return raw_data
        for iField in RelatedFields.index:
            iOldData = raw_data.pop(iField)
            iDataType = _identifyDataType(self._FactorInfo.loc[iField, "DataType"])
            if iDataType=="double":
                iNewData = pd.Series(np.nan, index=raw_data.index, dtype="float")
            else:
                iNewData = pd.Series(np.full(shape=(raw_data.shape[0], ), fill_value=None, dtype="O"), index=raw_data.index, dtype="O")
            iSQLStr = self._FactorInfo.loc[iField, "RelatedSQL"]
            if iSQLStr[0]=="{":
                iMapInfo = eval(iSQLStr).items()
            else:
                iStartIdx = iSQLStr.find("{KeyCondition}")
                if iStartIdx!=-1:
                    iEndIdx = iSQLStr[iStartIdx:].find(" ")
                    if iEndIdx==-1: iEndIdx = len(iSQLStr)
                    else: iEndIdx += iStartIdx
                    iStartIdx += 14
                    KeyField = iSQLStr[iStartIdx:iEndIdx]
                    iOldDataType = _identifyDataType(self._FactorInfo.loc[iField[:-2], "DataType"])
                    iKeys = iOldData[pd.notnull(iOldData)].unique().tolist()
                    if iKeys:
                        KeyCondition = genSQLInCondition(KeyField, iKeys, is_str=(iOldDataType!="double"))
                    else:
                        KeyCondition = KeyField+" IN (NULL)"
                    iSQLStr = iSQLStr.replace("{KeyCondition}"+KeyField, "{KeyCondition}")
                else:
                    KeyCondition = ""
                if iSQLStr.find("{Keys}")!=-1:
                    Keys = ", ".join([str(iKey) for iKey in iOldData[pd.notnull(iOldData)].unique()])
                    if not Keys: Keys = "NULL"
                else:
                    Keys = ""
                if iSQLStr.find("{SecuCode}")!=-1:
                    SecuCode = self._getSecuMainIDField()
                else:
                    SecuCode = ""
                iMapInfo = self._FactorDB.fetchall(iSQLStr.format(TablePrefix=self._FactorDB.TablePrefix, Keys=Keys, KeyCondition=KeyCondition, SecuCode=SecuCode))
            for jVal, jRelatedVal in iMapInfo:
                if pd.notnull(jVal):
                    iNewData[iOldData==jVal] = jRelatedVal
                else:
                    iNewData[pd.isnull(iOldData)] = jRelatedVal
            raw_data[iField] = iNewData
        return raw_data