Python pandas.notnull() Examples

The following are code examples for showing how to use pandas.notnull(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: gullikson-scripts   Author: kgullikson88   File: Sensitivity.py    MIT License 6 votes vote down vote up
def get_sec_spt(row):
    """
    Get the secondary spectral type from the information we have. Meant to be
    called as the `apply` method of a pandas DataFrame.
    """
    if pd.notnull(row['Sp2']):
        return row['Sp2']
    elif pd.notnull(row['Sp1']) and pd.notnull(row['mag1']) and pd.notnull(row['mag2']):
        # TODO: Do better than assuming V band!
        band = 'V'
        absmag_prim = MS.GetAbsoluteMagnitude(row['Sp1'], color=band)
        dm = float(row['mag1']) - absmag_prim
        absmag_sec = float(row['mag2']) - dm
        return MS.GetSpectralType_FromAbsMag(absmag_sec, color=band)[0]
    elif pd.notnull(row['Sp1']) and pd.notnull(row['K1']) and pd.notnull(row['K2']):
        mass = MS.Interpolate('mass', row['Sp1'])
        q = float(row['K1']) / float(row['K2'])
        sec_mass = q * mass
        return MS.GetSpectralType('mass', sec_mass)[0]
    else:
        print(row)
        raise ValueError('Must give enough information to figure out the spectral type!') 
Example 2
Project: matchminer-engine   Author: dfci   File: utilities.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def add_matches(trial_matches_df, db):
    """Add the match table to the database or update what already exists theres"""

    if 'clinical_id' in trial_matches_df.columns:
        trial_matches_df['clinical_id'] = trial_matches_df['clinical_id'].apply(lambda x: str(x))

    if 'genomic_id' in trial_matches_df.columns:
        trial_matches_df['genomic_id'] = trial_matches_df['genomic_id'].apply(lambda x: str(x))

    if 'report_date' in trial_matches_df.columns:
        trial_matches_df['report_date'] = trial_matches_df['report_date'].apply(
            lambda x: dt.datetime.strftime(x, '%Y-%m-%d %X') if pd.notnull(x) else x)

    if len(trial_matches_df.index) > 0:
        db.trial_match.drop()
        for i in range(0, trial_matches_df.shape[0], 1000):
            records = json.loads(trial_matches_df[i:i + 1000].T.to_json()).values()
            db.trial_match.insert_many(records) 
Example 3
Project: lifestyles   Author: CamDavidsonPilon   File: cbc_hb.py    MIT License 6 votes vote down vote up
def _create_observation_variable(individual_selections, choices, partsworth):
    """
    This function handles creating the PyMC3 observation variables.  It also gracefully handles missing observations in individual selections.

    `individual_selections` is a Series of the individuals selections made, starting from 0. It can contain NaNs which represent answer was not provided.

    `choices` is a DataFrame with a hierarchical index: level=0 enumerates the choices, and level=1 displays the profile at a specific choice.
    It's size is (n_questions, n_choices_per_question).

    `partsworth` is a slice of PyMC3 matrix. It represents the partsworth variables of a individual. Size is (n_profiles,)

    This computes the values exp(partsworth * profile_j) / sum[ exp(partsworth * profile_k ] for all j.
    """
    nan_mask = pd.notnull(individual_selections)
    return pm.Categorical("Obs_%s" % individual_selections.name,
                          tt.nnet.softmax(tt.stack([
                            tt.dot(choice.values, partsworth) for _, choice in choices[nan_mask.values].groupby(axis=1, level=0)
                          ], axis=0).T),
                          observed=individual_selections[nan_mask.values].values) 
Example 4
Project: recordlinkage   Author: J535D165   File: utils.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fillna(series_or_arr, missing_value=0.0):
    """Fill missing values in pandas objects and numpy arrays.

    Arguments
    ---------
    series_or_arr : pandas.Series, numpy.ndarray
        The numpy array or pandas series for which the missing values
        need to be replaced.
    missing_value : float, int, str
        The value to replace the missing value with. Default 0.0.

    Returns
    -------
    pandas.Series, numpy.ndarray
        The numpy array or pandas series with the missing values
        filled.
    """

    if pandas.notnull(missing_value):
        if isinstance(series_or_arr, (numpy.ndarray)):
            series_or_arr[numpy.isnan(series_or_arr)] = missing_value
        else:
            series_or_arr.fillna(missing_value, inplace=True)

    return series_or_arr 
Example 5
Project: QuantStudio   Author: Scorpi000   File: AbnormalReturn.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        Mask = (self._Output["事件记录"][:, 2]<=self.EventPostWindow)
        if np.sum(Mask)>0:
            RowPos, ColPos = np.arange(self._Output["异常收益率"].shape[0])[Mask].tolist(), (self._Output["事件记录"][Mask, 2]+self.EventPreWindow).astype(np.int)
            for i in range(RowPos.shape[0]):
                X = self._Output["市场超额收益率"][RowPos[i], :]
                iMask = pd.notnull(X)
                X = sm.add_constant(X[iMask], prepend=True)
                self._Output["异常协方差"][RowPos[i], iMask, iMask] = (np.eye(X.shape[0])+np.dot(np.dot(X, np.linalg.inv(np.dot(X.T, X))), X.T)) * self._Output["Var"][RowPos[i]]
        Index = pd.MultiIndex.from_arrays(self._Output["事件记录"][:,:2].T, names=["ID", "时点"])
        self._Output["回归估计量"] = pd.DataFrame(self._Output.pop("Alpha"), index=Index, columns=["Apha"])
        self._Output["回归估计量"]["Beta"] = self._Output.pop("Beta")
        self._Output["回归估计量"]["Sigma2"] = self._Output.pop("Var")
        self._Output.pop("市场超额收益率")
        return 0 
Example 6
Project: QuantStudio   Author: Scorpi000   File: IC.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        CalcDateTimes = self._Output.pop("时点")
        self._Output["股票数"] = pd.DataFrame(self._Output["股票数"], index=CalcDateTimes)
        self._Output["IC"] = pd.DataFrame(self._Output["IC"], index=CalcDateTimes)
        for i, iFactorName in enumerate(self.TestFactors):
            if self.FactorOrder[iFactorName]=="升序": self._Output["IC"][iFactorName] = -self._Output["IC"][iFactorName]
        self._Output["IC的移动平均"] = self._Output["IC"].copy()
        for i in range(len(CalcDateTimes)):
            if i<self.RollAvgPeriod-1: self._Output["IC的移动平均"].iloc[i,:] = np.nan
            else: self._Output["IC的移动平均"].iloc[i,:] = self._Output["IC"].iloc[i-self.RollAvgPeriod+1:i+1, :].mean()
        self._Output["统计数据"] = pd.DataFrame(index=self._Output["IC"].columns)
        self._Output["统计数据"]["平均值"] = self._Output["IC"].mean()
        self._Output["统计数据"]["标准差"] = self._Output["IC"].std()
        self._Output["统计数据"]["最小值"] = self._Output["IC"].min()
        self._Output["统计数据"]["最大值"] = self._Output["IC"].max()
        self._Output["统计数据"]["IC_IR"] = self._Output["统计数据"]["平均值"] / self._Output["统计数据"]["标准差"]
        self._Output["统计数据"]["t统计量"] = np.nan
        self._Output["统计数据"]["平均股票数"] = self._Output["股票数"].mean()
        self._Output["统计数据"]["IC×Sqrt(N)"] = self._Output["统计数据"]["平均值"]*np.sqrt(self._Output["统计数据"]["平均股票数"])
        self._Output["统计数据"]["有效期数"] = 0.0
        for iFactor in self._Output["IC"]: self._Output["统计数据"].loc[iFactor,"有效期数"] = pd.notnull(self._Output["IC"][iFactor]).sum()
        self._Output["统计数据"]["t统计量"] = (self._Output["统计数据"]["有效期数"]**0.5)*self._Output["统计数据"]["IC_IR"]
        return 0 
Example 7
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 6 votes vote down vote up
def maskCategary(data_len,cat_data=None, mask=None):
    if mask is None:
        mask = (np.zeros((data_len,))==0)
    if cat_data is not None:
        cat_data[pd.isnull(cat_data)] = np.nan
        if cat_data.ndim==1:
            cat_data = cat_data.reshape((cat_data.shape[0],1))
        AllCats = [list(pd.unique(cat_data[mask,i])) for i in range(cat_data.shape[1])]
        AllCats = CartesianProduct(AllCats)
    else:
        AllCats = [(np.nan,)]
        cat_data = np.empty((data_len,1),dtype='float')+np.nan
    CatMask = {}
    for i,iCat in enumerate(AllCats):
        iMask = mask
        for j,jSubCat in enumerate(iCat):
            if pd.notnull(jSubCat):
                iMask = (iMask & (cat_data[:,j]==jSubCat))
            else:
                iMask = (iMask & pd.isnull(cat_data[:,j]))
        CatMask[tuple(iCat)] = iMask
    return CatMask
# 准备回归的数据 
Example 8
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 6 votes vote down vote up
def neutralize(Y, X, cov_matrix, mask=None, constant=False, dummy_data=None, drop_dummy_na=False, other_handle='填充None'):
    StdData = np.empty(Y.shape,dtype='float')+np.nan
    if mask is None:
        mask = pd.isnull(StdData)
    cov_matrix = cov_matrix[mask,:][:,mask]
    NotNAMask,_,YY,XX = prepareRegressData(Y[mask], (X[mask] if X is not None else X), has_constant=constant, dummy_data=(dummy_data[mask] if dummy_data is not None else dummy_data),drop_dummy_na=drop_dummy_na)
    Mask = (np.sum(pd.notnull(cov_matrix),axis=1)>0)
    Mask = ((np.sum(pd.isnull(cov_matrix[:,Mask]),axis=1)==0) & Mask & NotNAMask)
    cov_matrix = cov_matrix[Mask,:][:,Mask]
    YY = YY[Mask[NotNAMask]]
    XX = XX[Mask[NotNAMask]]
    if XX.ndim==1:
        XX = np.reshape(XX,(XX.shape[0],1))
    Temp = StdData[mask]
    Temp[Mask] = YY - np.dot(np.dot(np.dot(np.dot(XX,np.linalg.inv(np.dot(np.dot(XX.T,cov_matrix),XX))),XX.T),cov_matrix),YY)
    StdData[mask] = Temp
    if other_handle=="保持不变":
        StdData[~mask] = Y[~mask]
    return StdData
# 合并因子数据
# data: 待处理的数据, [array,...] or array; method: 合成方式, 可选: 直接合成, 归一合成; nan_handle: 缺失处理, 可选: 剩余合成, 填充None 
Example 9
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 6 votes vote down vote up
def merge(data, mask=None, weight=None, method='直接合成', nan_handle='剩余合成'):
    if not isinstance(data,np.ndarray):
        data = np.array(list(zip(*data)))
    elif data.ndim==1:
        data = np.reshape(data,(data.shape[0],1))
    if mask is None:
        mask = (np.zeros(data.shape[0])==0)
    if weight is None:
        weight = np.ones(data.shape[1])/data.shape[1]
    else:
        weight = np.array(weight)
    if method=='归一合成':
        weight = weight/np.sum(np.abs(weight))
    if nan_handle=='填充None':
        StdData = np.sum(data*weight,axis=1)
    elif nan_handle=='剩余合成':
        StdData = np.nansum(data*weight,axis=1)
        if method=="归一合成":
            TotalWeight = np.sum(pd.notnull(data)*np.abs(weight),axis=1)
            TotalWeight[TotalWeight==0] = np.nan
            StdData = StdData/TotalWeight
    StdData[~mask] = np.nan
    return StdData 
Example 10
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 6 votes vote down vote up
def genPortfolioByFiltration(factor_data, ascending=False, target_num=20, target_quantile=0.1, weight=None):
    factor_data = factor_data[pd.notnull(factor_data)]
    factor_data = factor_data.sort_values(inplace=False,ascending=ascending)
    if target_num is not None:
        TargetIDs = set(factor_data.iloc[:target_num].index)
    else:
        TargetIDs = set(factor_data.index)
    if target_quantile is not None:
        if ascending:
            TargetIDs = set(factor_data[factor_data<=factor_data.quantile(target_quantile)].index).intersection(TargetIDs)
        else:
            TargetIDs = set(factor_data[factor_data<=factor_data.quantile(target_quantile)].index).intersection(TargetIDs)
    TargetIDs = list(TargetIDs)
    TargetIDs.sort()
    Portfolio = weight[TargetIDs]
    Portfolio = Portfolio[pd.notnull(Portfolio) & (Portfolio!=0)]
    return Portfolio/Portfolio.sum()
# 生成期货连续合约的价格序列
# id_map: 连续合约每一期的月合约 ID, Series(ID)
# price: 月合约的价格序列, DataFrame(价格, index=id_map.index, columns=[月合约ID])
# adj_direction: 调整方向, 可选: "前复权"(最后一期价格不变), "后复权"(第一期价格不变)
# adj_type: 调整方式, 可选: "收益率不变", "价差不变","价格不变"
# rollover_ahead: 合约展期是否提前一期, bool
# 返回: Series(价格, index=id_map.index) 
Example 11
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def plotCDF(self):# 经验分布图
        SelectedColumn = self.getSelectedColumns()
        if len(SelectedColumn)!=1: return QtWidgets.QMessageBox.critical(self, "错误", "请选择一列!")
        SelectedDF,Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        SelectedDF = SelectedDF.iloc[:,0]
        xData = SelectedDF[pd.notnull(SelectedDF)].values
        xData.sort()
        nData = xData.shape[0]
        Delta = (xData[-1]-xData[0])/nData
        xData = np.append(xData[0]-Delta,xData)
        xData = np.append(xData,xData[-1]+Delta)
        yData = (np.linspace(0,nData+1,nData+2))/(nData)
        yData[-1] = yData[-2]
        GraphObj = [plotly.graph_objs.Scatter(x=xData,y=yData,name="经验分布函数")]
        xNormalData = np.linspace(xData[0],xData[-1],(nData+2)*10)
        yNormalData = stats.norm.cdf(xNormalData,loc=np.mean(xData[1:-1]),scale=np.std(xData[1:-1]))
        GraphObj.append(plotly.graph_objs.Scatter(x=xNormalData,y=yNormalData,name="Normal Distribution"))
        with tempfile.TemporaryFile() as File:
            plotly.offline.plot({"data":GraphObj,"layout": plotly.graph_objs.Layout(title="经验分布")}, filename=File.name)
        return 0 
Example 12
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def calStatistics(self):# 统计量
        SelectedDF, Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        # 设置要统计的索引
        SelectedIndex = self._getDataIndex(list(SelectedDF.index))
        SummaryData = pd.DataFrame(index=['数量','均值','中位数','方差','标准差','最大值','最小值','总和','总积'],columns=[str(iCol) for iCol in SelectedDF.columns])
        for i,iCol in enumerate(SelectedDF.columns):
            iData = SelectedDF.iloc[:,i].loc[SelectedIndex]
            SummaryData.loc['总和'].iloc[i] = iData.sum()
            SummaryData.loc['数量'].iloc[i] = iData[pd.notnull(iData)].shape[0]
            SummaryData.loc['均值'].iloc[i] = iData.mean()
            SummaryData.loc['方差'].iloc[i] = iData.var()
            SummaryData.loc['标准差'].iloc[i] = iData.std()
            SummaryData.loc['中位数'].iloc[i] = iData.median()
            SummaryData.loc['总积'].iloc[i] = iData.prod()
            SummaryData.loc['最大值'].iloc[i] = iData.max()
            SummaryData.loc['最小值'].iloc[i] = iData.min()
        TableWidget = QtWidgets.QTableWidget()
        populateTableWithDataFrame(TableWidget, SummaryData)
        _TableDlg(None, TableWidget).exec_()
        return 0 
Example 13
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def plotHist(self):
        SelectedColumn = self.getSelectedColumns()
        if len(SelectedColumn)!=1: return QtWidgets.QMessageBox.critical(self, "错误", "请选择一列!")
        SelectedDF, Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        SelectedDF = SelectedDF.iloc[:,0]
        GroupNum,isOK = QtWidgets.QInputDialog.getInt(self, "获取分组数", "分组数", value=10, min=1, max=1000, step=1)
        if not isOK: return 0
        tempFigDlg = _MatplotlibWidget()
        Fig = tempFigDlg.Mpl.Fig
        Axes = Fig.add_subplot(111)
        yData = SelectedDF[pd.notnull(SelectedDF)].values
        xData = np.linspace(np.min(yData),np.max(yData),len(yData)*10)
        yNormalData = stats.norm.pdf(xData,loc=np.mean(yData),scale=np.std(yData))
        Axes.hist(yData, GroupNum, density=True, label='直方图', color="b")
        Axes.plot(xData, yNormalData, label='Normal Distribution', linewidth=2, color='r')
        Axes.legend(loc='upper left', shadow=True)
        tempFigDlg.Mpl.draw()
        tempFigDlg.show()
        return 0 
Example 14
Project: QuantStudio   Author: Scorpi000   File: ResultDlg.py    GNU General Public License v3.0 6 votes vote down vote up
def plotCDF(self):
        SelectedColumn = self.getSelectedColumns()
        if len(SelectedColumn)!=1: return QtWidgets.QMessageBox.critical(self, "错误", "请选择一列!")
        SelectedDF, Msg = self.getSelectedDF(all_num=True)
        if SelectedDF is None: return QtWidgets.QMessageBox.critical(self, "错误", Msg)
        SelectedDF = SelectedDF.iloc[:,0]
        tempFigDlg = _MatplotlibWidget()
        Fig = tempFigDlg.Mpl.Fig
        Axes = Fig.add_subplot(111)
        xData = SelectedDF[pd.notnull(SelectedDF)].values
        xData.sort()
        nData = len(xData)
        Delta = (xData[-1]-xData[0])/nData
        xData = np.append(xData[0]-Delta,xData)
        xData = np.append(xData,xData[-1]+Delta)
        yData = (np.linspace(0,nData+1,nData+2))/(nData)
        yData[-1] = yData[-2]
        Axes.plot(xData,yData,label='经验分布函数',linewidth=2,color='b')
        xNormalData = np.linspace(xData[0],xData[-1],(nData+2)*10)
        yNormalData = stats.norm.cdf(xNormalData,loc=np.mean(xData[1:-1]),scale=np.std(xData[1:-1]))
        Axes.plot(xNormalData, yNormalData, label='Normal Distribution', linewidth=2, color='r')
        Axes.legend(loc='upper left',shadow=True)
        tempFigDlg.Mpl.draw()
        tempFigDlg.show()
        return 0 
Example 15
Project: QuantStudio   Author: Scorpi000   File: AuxiliaryFun.py    GNU General Public License v3.0 6 votes vote down vote up
def getClassMask(subclass,class_data):
    if isinstance(class_data, np.ndarray):
        Mask = np.array([True]*class_data.shape[0])
    else:
        Mask = pd.Series(True,index=class_data.index)
    if subclass is None:
        return Mask
    if isinstance(class_data, np.ndarray):
        for j,jSubClass in enumerate(subclass):
            if pd.notnull(jSubClass):
                Mask = Mask & (class_data[:,j]==jSubClass)
            else:
                Mask = Mask & pd.isnull(class_data[:,j])
    else:
        for j,jSubClass in enumerate(subclass):
            if pd.notnull(jSubClass):
                Mask = Mask & (class_data.iloc[:,j]==jSubClass)
            else:
                Mask = Mask & pd.isnull(class_data.iloc[:,j])
    return Mask

# 使得两个Series相匹配, 即 index 一致, 缺失的按照指定值填充 
Example 16
Project: QuantStudio   Author: Scorpi000   File: DataTypeConversionFun.py    GNU General Public License v3.0 6 votes vote down vote up
def DummyVarTo01Var(dummy_var,ignore_na=False,ignores=[],ignore_nonstring=False):
    if dummy_var.shape[0]==0:
        return pd.DataFrame()
    NAMask = pd.isnull(dummy_var)
    if ignore_na:
        AllClasses = dummy_var[~NAMask].unique()
    else:
        dummy_var[NAMask] = np.nan
        AllClasses = dummy_var.unique()
    AllClasses = [iClass for iClass in AllClasses if (iClass not in ignores) and ((not ignore_nonstring) or isinstance(iClass,str) or pd.isnull(iClass))]
    OZVar = pd.DataFrame(0.0,index=dummy_var.index,columns=AllClasses,dtype='float')
    for iClass in AllClasses:
        if pd.notnull(iClass):
            iMask = (dummy_var==iClass)
        else:
            iMask = NAMask
        OZVar[iClass][iMask] = 1.0
    return OZVar
# 将DataFrame转化成二重索引的Series,DataFrame的index和columns二重索引。 
Example 17
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 6 votes vote down vote up
def calcBlendingCoefficient(specific_ret):
    Gamma = {}
    for iID in specific_ret.columns:
        iSpecificRet = specific_ret[iID]
        iSpecificRet = iSpecificRet[pd.notnull(iSpecificRet)].values
        ih = iSpecificRet.shape[0]
        if ih==0:
            Gamma[iID]=0
            continue
        iRobustStd = 1/1.35*(np.percentile(iSpecificRet,75)-np.percentile(iSpecificRet,25))
        iSpecificRet[iSpecificRet>10*iRobustStd] = 10*iRobustStd
        iSpecificRet[iSpecificRet<-10*iRobustStd] = -10*iRobustStd
        iStd = np.std(iSpecificRet)
        iZVal = np.abs((iStd-iRobustStd)/iRobustStd)
        Gamma[iID] = min((1,max((0,(ih-60)/120))))*min((1,max((0,np.exp(1-iZVal)))))
    Gamma = pd.Series(Gamma,name='Gamma')
    Gamma[pd.isnull(Gamma)] = 0
    return Gamma
    
# 计算Structural forcast of specific risk 
Example 18
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 6 votes vote down vote up
def calcSTRSpecificRisk(gamma, std_ts, factor_data, cap):
    # 准备回归数据
    IDs = gamma[gamma==1].index.tolist()# 选择gamma值为1的ID
    Y = std_ts.loc[IDs]
    Y[Y==0] = np.nan
    FactorData = factor_data.loc[IDs, :]
    FactorData = FactorData.loc[:, FactorData.abs().sum()!=0]
    RegWeight = calcRegressWeight(cap).loc[IDs]
    # 回归
    Coef = regressWithOneLinearEqConstraint(np.log(Y.values), FactorData.values, RegWeight.values)
    # 估计Scale Multiplier
    Temp = Y.values / np.exp(np.dot(FactorData.values, Coef))
    Mask = (pd.notnull(Temp) & pd.notnull(RegWeight.values))
    E0 = np.nansum(Temp[Mask] * RegWeight.values[Mask]) / np.nansum(RegWeight.values[Mask])
    # 计算Structural forcast of specific risk
    return pd.Series(np.exp(np.dot(factor_data.loc[:, FactorData.columns].values, Coef)) * E0, index=std_ts.index)

# 估计特异性风险, 使用Barra EUE3的方法, 参见EUE3
# specific_ret: DataFrame(收益率,index=[日期],columns=[ID]); forcast_num: 向前预测的期数;
# auto_corr_num: 考虑有自相关性的最大期数; half_life: 时间指数权重半衰期; 
Example 19
Project: QuantStudio   Author: Scorpi000   File: RiskModelFun.py    GNU General Public License v3.0 6 votes vote down vote up
def BayesianShrinkage(specific_risk, cap,quantile_num=10, q=0.1):
    Rslt = pd.Series(np.nan,index=specific_risk.index)
    Mask = pd.notnull(specific_risk)
    specific_risk = specific_risk[Mask]
    cap = cap[Mask]
    for i in range(quantile_num):
        if i==0:
            iIDs = cap[cap<cap.quantile((i+1)/quantile_num)].index
        elif i==quantile_num-1:
            iIDs = cap[cap>=cap.quantile(i/quantile_num)].index
        else:
            iIDs = cap[(cap>=cap.quantile(i/quantile_num)) & (cap<cap.quantile((i+1)/quantile_num))].index
        iSpecificRisk = specific_risk[iIDs]
        iStd = (iSpecificRisk*cap[iIDs]).sum()/cap[iIDs].sum()
        iDelta = ((iSpecificRisk-iStd)**2).sum()/iSpecificRisk.shape[0]
        iv = q*(iSpecificRisk-iStd).abs()/(q*(iSpecificRisk-iStd).abs()+iDelta)
        Rslt[iIDs] = iSpecificRisk*iv+(1-iv)*iStd
    return Rslt

# Volatility Regime Adjustment
# ret: DataFrame(收益率,index=[日期(频率为日)],columns=[ID或者因子]);
# forcast_volitility: DataFrame(波动率预测,index=[预测日期],columns=[ID或者因子]);
# half_life: 计算乘子的半衰期; forcast_num: 预测期数, 如果为<=0的数据则用forcast_volitility的日期间隔计算收益
# 返回调整乘子 
Example 20
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, name, fdb, sys_args={}, **kwargs):
        self._DBTableName = fdb.TablePrefix + fdb._TableInfo.loc[name, "DBTableName"]
        self._FactorInfo = fdb._FactorInfo.loc[name]
        self._IDField = self._FactorInfo["DBFieldName"][self._FactorInfo["FieldType"]=="ID"].iloc[0]# ID 字段
        self._IDFieldIsStr = (_identifyDataType(self._FactorInfo["DataType"][self._FactorInfo["FieldType"]=="ID"].iloc[0])!="double")
        self._ConditionFields = self._FactorInfo[self._FactorInfo["FieldType"]=="Condition"].index.tolist()# 所有的条件字段列表
        self._MainTableName = fdb._TableInfo.loc[name, "MainTableName"]
        if pd.isnull(self._MainTableName):
            self._MainTableName = self._DBTableName
            self._MainTableID = self._IDField
            self._MainTableCondition = None
        else:
            self._MainTableName = fdb.TablePrefix + self._MainTableName
            self._MainTableID = fdb._TableInfo.loc[name, "MainTableID"]
            self._JoinCondition = fdb._TableInfo.loc[name, "JoinCondition"].format(DBTable=self._DBTableName, MainTable=self._MainTableName)
            self._MainTableCondition = fdb._TableInfo.loc[name, "MainTableCondition"]
            if pd.notnull(self._MainTableCondition):
                self._MainTableCondition = self._MainTableCondition.format(MainTable=self._MainTableName)
            self._IDFieldIsStr = True
        self._SecurityType = fdb._TableInfo.loc[name, "SecurityType"]
        return super().__init__(name=name, fdb=fdb, sys_args=sys_args, **kwargs) 
Example 21
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def __QS_prepareRawData__(self, factor_names, ids, dts, args={}):
        # 形成SQL语句, ID, 因子数据
        SQLStr = "SELECT "+self._getIDField()+" AS ID, "
        FieldSQLStr, SETableJoinStr = self._genFieldSQLStr(factor_names)
        SQLStr += FieldSQLStr+" "
        SQLStr += self._genFromSQLStr(setable_join_str=SETableJoinStr)+" "
        SQLStr += "WHERE ("+genSQLInCondition(self._MainTableName+"."+self._MainTableID, deSuffixID(ids), is_str=self._IDFieldIsStr, max_num=1000)+") "
        if pd.notnull(self._MainTableCondition): SQLStr += "AND "+self._MainTableCondition+" "
        SQLStr += self._genConditionSQLStr(args=args)+" "
        SQLStr += "ORDER BY ID"
        RawData = self._FactorDB.fetchall(SQLStr)
        if not RawData: return pd.DataFrame(columns=["ID"]+factor_names)
        RawData = pd.DataFrame(np.array(RawData, dtype="O"), columns=["ID"]+factor_names)
        RawData = self._adjustRawDataByRelatedField(RawData, factor_names)
        RawData["ID"] = [str(iID) for iID in RawData["ID"]]
        return RawData 
Example 22
Project: QuantStudio   Author: Scorpi000   File: JYDB.py    GNU General Public License v3.0 6 votes vote down vote up
def getID(self, ifactor_name=None, idt=None, args={}):
        SQLStr = "SELECT DISTINCT "+self._getIDField()+" AS ID "
        SQLStr += self._genFromSQLStr()+" "
        if idt is not None:
            SQLStr += "WHERE "+self._DBTableName+"."+self._StartDateField+"<='"+idt.strftime("%Y-%m-%d")+"' "
            if self._EndDateIncluded:
                SQLStr += "AND "+self._DBTableName+"."+self._EndDateField+">='"+idt.strftime("%Y-%m-%d")+"' "
            else:
                SQLStr += "AND "+self._DBTableName+"."+self._EndDateField+">'"+idt.strftime("%Y-%m-%d")+"' "        
        else: SQLStr += "WHERE "+self._DBTableName+"."+self._StartDateField+" IS NOT NULL "
        SQLStr += "AND "+self._DBTableName+"."+self._IDField+" IS NOT NULL "
        if pd.notnull(self._MainTableCondition): SQLStr += "AND "+self._MainTableCondition+" "
        SQLStr += self._genConditionSQLStr(args=args)+" "
        SQLStr += "ORDER BY ID"
        return [iRslt[0] for iRslt in self._FactorDB.fetchall(SQLStr)]
    # 返回给定 ID iid 的起始日期距今的时点序列
    # 如果 idt 为 None, 将以表中最小的起始日期作为起点
    # 忽略 ifactor_name 
Example 23
Project: gullikson-scripts   Author: kgullikson88   File: Sensitivity.py    MIT License 5 votes vote down vote up
def split_by_component(df):
    df['prim_comp'] = df.Comp.map(lambda s: s[0])
    df['sec_comp'] = df.Comp.map(lambda s: s[-1])
    comps = pd.concat((df[['prim_comp', 'Sp1']], df[['sec_comp', 'Sp2']]))
    prim = comps.loc[comps.prim_comp.notnull()].rename(columns={'Sp1': 'SpT', 'prim_comp': 'comp'})
    sec = comps.loc[comps.sec_comp.notnull()].rename(columns={'Sp2': 'SpT', 'sec_comp': 'comp'})
    return pd.concat((prim, sec))[['comp', 'SpT']].drop_duplicates(subset='comp') 
Example 24
Project: techa   Author: havocesp   File: overlap.py    The Unlicense 5 votes vote down vote up
def KAMA(data, period=20, er_period=10, fast=2, slow=30, price='close'):
    """
    Kaufman's Adaptive Moving Average

    KAMA is a moving average designed to account for market noise or volatility.

    It's main advantage is that it takes into consideration not just the direction, but the market volatility as
    well.

    :param pd.DataFrame data: pandas DataFrame with open, high, low, close data
    :param int period: period used for indicator calculation
    :param int er_period: period used for indicator calculation
    :param int fast: fast period used for indicator calculation
    :param int slow: slow period used for indicator calculation
    :param str price: column used for indicator calculation (default = "close")
    :return pd.Series: with indicator data calculation results
    """
    er = ER(data, er_period)
    fast_alpha = 2 / (fast + 1)
    slow_alpha = 2 / (slow + 1)

    # smoothing constant
    # noinspection PyTypeChecker
    sc = pd.Series((er * (fast_alpha - slow_alpha) + slow_alpha) ** 2)
    sma_ = SMA(data, period, price)

    kama_ = []

    for smooth, ma, price in zip(sc, sma_.shift(-1), data[price]):
        try:
            kama_.append(kama_[-1] + smooth * (price - kama_[-1]))
        except (IndexError, TypeError):
            if pd.notnull(ma):
                kama_.append(ma + smooth * (price - ma))
            else:
                kama_.append(None)

    return pd.Series(kama_, index=sma_.index, name='KAMA') 
Example 25
Project: recordlinkage   Author: J535D165   File: base.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _make_index_names(self, name1, name2):

        if pandas.notnull(name1) and pandas.notnull(name2) and \
                (name1 == name2):
            return ["{}{}".format(name1, self.suffixes[0]),
                    "{}{}".format(name1, self.suffixes[1])]
        else:
            return [name1, name2] 
Example 26
Project: SA-LSTM   Author: hobincar   File: MSVD.py    MIT License 5 votes vote down vote up
def load_captions(self):
        df = pd.read_csv(self.caption_fpath)
        df = df[df['Language'] == 'English']
        df = df[pd.notnull(df['Description'])]
        captions = df['Description'].values
        return captions 
Example 27
Project: SA-LSTM   Author: hobincar   File: MSVD.py    MIT License 5 votes vote down vote up
def load_captions(self):
        df = pd.read_csv(self.caption_fpath)
        df = df[df['Language'] == 'English']
        df = df[[ 'VideoID', 'Start', 'End', 'Description' ]]
        df = df[pd.notnull(df['Description'])]

        for video_id, start, end, caption in df.values:
            vid = "{}_{}_{}".format(video_id, start, end)
            self.captions[vid].append(caption) 
Example 28
Project: SA-LSTM   Author: hobincar   File: MSVD.py    MIT License 5 votes vote down vote up
def load_metadata():
    df = pd.read_csv(C.caption_fpath)
    df = df[df['Language'] == 'English']
    df = df[pd.notnull(df['Description'])]
    df = df.reset_index(drop=True)
    return df 
Example 29
Project: grafana-csv-datasource   Author: SmartBlug   File: PythonServer.py    MIT License 5 votes vote down vote up
def dataframe_to_json_table(target, df):
    response = []

    if df.empty:
        return response

    if isinstance(df, pd.DataFrame):
        response.append({'type': 'table',
                         'columns': df.columns.map(lambda col: {"text": col}).tolist(),
                         'rows': df.where(pd.notnull(df), None).values.tolist()})
    else:
        abort(404, Exception('Received object is not a dataframe.'))

    return response 
Example 30
Project: msdas   Author: cokelaer   File: replicates.py    GNU General Public License v3.0 5 votes vote down vote up
def set_irrelevant_replicates_to_na(self):
        """Set unique replicate to NAs

        If an experiment has no replicates (0 or 1), you may want to set the
        experiment to NA. This is not relevant if there are zero replicates
        since the value may already be an NA but may make sense when there
        is only one replicate, for which no errors can be obtained.


        .. plot::
            :include-source:
            :width: 70%

            from msdas import *
            r = ReplicatesYeast(get_yeast_raw_data(), verbose=False)
            r.set_irrelevant_replicates_to_na()
            r.hist_na_per_experiments(color="r", alpha=0.5)
            r.reset()
            r.hist_na_per_experiments(color="g", alpha=0.5)

        """
        tags = self.get_unique_measurement_name()
        for tag in tags:
            df = self.get_replicates_from_one_unique_measurement(tag)
            indices = pd.notnull(df).sum(axis=1)<=1
            indices = [k for k,v in indices.iteritems() if v]
            colnames = [c for c in self.df.columns if c.split(".")[0]==tag]
            self.df.ix[indices, colnames] = np.nan 
Example 31
Project: msdas   Author: cokelaer   File: replicates.py    GNU General Public License v3.0 5 votes vote down vote up
def _get_na_count_per_experiment(self):
        nas = {}
        for tag in self.get_unique_measurement_name():
           df = self.get_replicates_from_one_unique_measurement(tag)
           R = len(df.columns)
           na = R  - pd.notnull(df).sum(axis=1)
           nas[tag] = na.copy()
        return nas 
Example 32
Project: msdas   Author: cokelaer   File: readers.py    GNU General Public License v3.0 5 votes vote down vote up
def get_na_count(self):
        """Return vector with number of NA per row (psite/protein)"""
        return len(self.df.columns) - pd.notnull(self.df).sum(axis=1) 
Example 33
Project: QuantStudio   Author: Scorpi000   File: ReturnBasedModel.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        TargetNAV = self._TargetTable.readData(dts=[idt], ids=self._Output["目标ID"], factor_names=[self.TargetNAV]).iloc[0, :, :].values
        self._Output["目标净值"] = np.r_[self._Output["目标净值"], TargetNAV]
        StyleNAV = self._StyleTable.readData(dts=[idt], ids=self._Output["风格ID"], factor_names=[self.StyleNAV]).iloc[0, :, :].values
        self._Output["风格指数净值"] = np.r_[self._Output["风格指数净值"], StyleNAV]
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
        if self._Output["目标净值"].shape[0]-1<self.MinSummaryWindow: return 0
        StartInd = int(max(0, self._Output["目标净值"].shape[0] - 1 - self.SummaryWindow))
        X = _calcReturn(self._Output["风格指数净值"][StartInd:, :], return_type=self.ReturnType)
        Y = _calcReturn(self._Output["目标净值"][StartInd:, :], return_type=self.ReturnType)
        nTargetID, nStyleID = len(self._Output["目标ID"]), len(self._Output["风格ID"])
        Rsquared = np.full((nTargetID, ), np.nan)
        for i, iID in enumerate(self._Output["目标ID"]):
            iMask = ((np.sum(pd.isnull(X), axis=1)==0) & (pd.notnull(Y[:, i])))
            try:
                iBeta = regressByCVX(Y[:, i], X, weight=None, constraints={"Box": {"ub": np.ones((nStyleID, )), "lb": np.zeros((nStyleID, ))},
                                                                                                                      "LinearEq": {"Aeq": np.ones((1, nStyleID)), "beq": 1}})
            except:
                iBeta = None
            if iBeta is None:
                self._Output["滚动回归系数"][iID].append(np.full((nStyleID, ), np.nan))
            else:
                self._Output["滚动回归系数"][iID].append(iBeta)
                Rsquared[i] = 1 - np.nansum((Y[:, i][iMask] - np.dot(X[iMask], iBeta))**2) / np.nansum((Y[:, i][iMask] - np.nanmean(Y[:, i][iMask]))**2)
        self._Output["滚动回归R平方"].append(Rsquared)
        self._Output["时点"].append(idt)
        return 0 
Example 34
Project: QuantStudio   Author: Scorpi000   File: ReturnBasedModel.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        DTs, StyleIDs, TargetIDs = self._Output.pop("时点"), self._Output.pop("风格ID"), self._Output.pop("目标ID")
        nTargetID, nStyleID = len(TargetIDs), len(StyleIDs)
        X = _calcReturn(self._Output["风格指数净值"], return_type=self.ReturnType)
        Y = _calcReturn(self._Output["目标净值"], return_type=self.ReturnType)
        self._Output["全样本回归系数"] = np.full(shape=(nStyleID, nTargetID), fill_value=np.nan)
        self._Output["全样本回归R平方"] = np.full(shape=(nTargetID, ), fill_value=np.nan)
        for i, iID in enumerate(TargetIDs):
            iMask = ((np.sum(pd.isnull(X), axis=1)==0) & (pd.notnull(Y[:, i])))
            try:
                iBeta = regressByCVX(Y[:, i], X, weight=None, constraints={"Box": {"ub": np.ones((nStyleID, )), "lb": np.zeros((nStyleID, ))},
                                                                                                                      "LinearEq": {"Aeq": np.ones((1, nStyleID)), "beq": 1}})
            except:
                iBeta = None
            if iBeta is not None:
                self._Output["全样本回归系数"][:, i] = iBeta
                self._Output["全样本回归R平方"][i] = 1 - np.nansum((Y[:, i][iMask] - np.dot(X[iMask], iBeta))**2) / np.nansum((Y[:, i][iMask] - np.nanmean(Y[:, i][iMask]))**2)
            self._Output["滚动回归系数"][iID] = pd.DataFrame(self._Output["滚动回归系数"][iID], index=DTs, columns=self.StyleIDs)
        self._Output["全样本回归系数"] = pd.DataFrame(self._Output["全样本回归系数"], index=StyleIDs, columns=TargetIDs)
        self._Output["全样本回归R平方"] = pd.DataFrame(self._Output["全样本回归R平方"], index=TargetIDs, columns=["全样本回归R平方"])
        self._Output["滚动回归R平方"] = pd.DataFrame(self._Output["滚动回归R平方"], index=DTs, columns=TargetIDs)
        self._Output["目标净值"] = pd.DataFrame(self._Output["目标净值"], index=self._Model.DateTimeSeries, columns=self.TargetIDs)
        self._Output["风格指数净值"] = pd.DataFrame(self._Output["风格指数净值"], index=self._Model.DateTimeSeries, columns=self.StyleIDs)
        return 0 
Example 35
Project: QuantStudio   Author: Scorpi000   File: BrinsonModel.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        PreDT = None
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
            if self._CurCalcInd>0: PreDT = self.CalcDTs[self._CurCalcInd - 1]
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
            if self._CurCalcInd>0: PreDT = self._Model.DateTimeSeries[self._CurCalcInd - 1]
        if PreDT is None: return 0
        Portfolio = self._FactorTable.readData(factor_names=[self.Portfolio, self.BenchmarkPortfolio], dts=[PreDT], ids=self._IDs).iloc[:, 0, :]
        BenchmarkPortfolio, Portfolio = Portfolio.iloc[:, 1], Portfolio.iloc[:, 0]
        Portfolio[pd.isnull(Portfolio)], BenchmarkPortfolio[pd.isnull(BenchmarkPortfolio)] = 0.0, 0.0
        Price = self._FactorTable.readData(factor_names=[self.PriceFactor], dts=[PreDT, idt], ids=self._IDs).iloc[0]
        Return = Price.iloc[1] / Price.iloc[0] - 1
        Return[pd.isnull(Return)] = 0.0
        GroupData = self._FactorTable.readData(factor_names=[self.GroupFactor], ids=self._IDs, dts=[PreDT]).iloc[0, 0, :]
        AllGroups = pd.unique(GroupData[pd.notnull(GroupData)].values).tolist()
        if GroupData.hasnans: AllGroups.append(None)
        for iGroup in AllGroups:
            if iGroup is None: iMask = pd.isnull(GroupData)
            else: iMask = (GroupData==iGroup)
            iGroup = str(iGroup)
            iPortfolio, iBenchmarkPortfolio = Portfolio[iMask], BenchmarkPortfolio[iMask]
            iGroupWeight, iBenchmarkGroupWeight = iPortfolio.sum(), iBenchmarkPortfolio.sum()
            self._Output["策略组合资产权重"].loc[idt, iGroup] = iGroupWeight
            self._Output["基准组合资产权重"].loc[idt, iGroup] = iBenchmarkGroupWeight
            self._Output["策略组合资产收益"].loc[idt, iGroup] = ((iPortfolio * Return[iMask]).sum() / iGroupWeight if iGroupWeight!=0 else 0.0)
            self._Output["基准组合资产收益"].loc[idt, iGroup] = ((iBenchmarkPortfolio * Return[iMask]).sum() / iBenchmarkGroupWeight if iBenchmarkGroupWeight!=0 else 0.0)
        self._Output["策略组合资产权重"].loc[idt, "现金"] = 1 - self._Output["策略组合资产权重"].loc[idt].iloc[1:].sum()
        self._Output["基准组合资产权重"].loc[idt, "现金"] = 1 - self._Output["基准组合资产权重"].loc[idt].iloc[1:].sum()
        return 0 
Example 36
Project: QuantStudio   Author: Scorpi000   File: PortfolioStrategy.py    GNU General Public License v3.0 5 votes vote down vote up
def _genSignalIDs(self, idt, original_ids, signal_type):
        IDs = original_ids
        FilterLevel = 0
        for i in range(self.FiltrationLevel):
            iArgs = self["第"+str(i)+"层"]
            if iArgs.SignalType!=signal_type: continue
            if iArgs.IDFilter:
                iIDs = self._FT.getFilteredID(idt, id_filter_str=iArgs.IDFilter)
                IDs = sorted(set(iIDs).intersection(set(IDs)))
            if iArgs.GroupFactors:
                GroupData = self._FT.readData(dts=[idt], ids=IDs, factor_names=list(iArgs.GroupFactors)).iloc[:,0,:]
                if GroupData.shape[0]>0: GroupData[pd.isnull(GroupData)] = np.nan
                AllGroups = [GroupData[iGroup].unique().tolist() for iGroup in iArgs.GroupFactors]
                AllGroups = CartesianProduct(AllGroups)
                IDs = []
                for jGroup in AllGroups:
                    jMask = pd.Series(True, index=GroupData.index)
                    for k, kSubGroup in enumerate(jGroup):
                        if pd.notnull(kSubGroup): jMask = (jMask & (GroupData[iArgs.GroupFactors[k]]==kSubGroup))
                        else: jMask = (jMask & pd.isnull(GroupData[iArgs.GroupFactors[k]]))
                    jIDs = self._filtrateID(idt, GroupData[jMask].index.tolist(), iArgs)
                    IDs += jIDs
            else:
                IDs = self._filtrateID(idt, IDs, iArgs)
            FilterLevel += 1
        if FilterLevel>0: return IDs
        else: return [] 
Example 37
Project: QuantStudio   Author: Scorpi000   File: DefaultAccount.py    GNU General Public License v3.0 5 votes vote down vote up
def order(self, target_id=None, num=0, target_price=np.nan, combined_order=None):
        if target_id is not None:
            self._Orders.loc[self._Orders.shape[0]] = (target_id, num, target_price)
            if pd.notnull(target_price): self._QS_Logger.warning("账户: '%s' 不支持限价单, 限价单将自动转为市价单!" % self.Name)
            return (self._Orders.shape[0], target_id, num, target_price)
        if combined_order is not None:
            if pd.notnull(combined_order["目标价"]).sum()>0: self._QS_Logger.warning("本账户: '%s' 不支持限价单, 限价单将自动转为市价单!" % self.Name)
            combined_order.index.name = "ID"
            combined_order = combined_order.reset_index()
            combined_order.index = np.arange(self._Orders.shape[0], self._Orders.shape[0]+combined_order.shape[0])
            self._Orders = self._Orders.append(combined_order)
        return combined_order
    # 撤销订单, order_ids 是订单在 self.Orders 中的 index 
Example 38
Project: QuantStudio   Author: Scorpi000   File: Spread.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        Price = self._FactorTable.readData(dts=[idt], ids=self._IDs, factor_names=[self.PriceFactor]).iloc[0, :, :].values
        if self.PriceType=="对数价格":
            Price = np.log(Price)
            Price[np.isinf(Price)] = np.nan
        self._Output["价格"] = np.r_[self._Output["价格"], Price]
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
        StartInd = int(max(0, self._Output["价格"].shape[0] - self.SummaryWindow))
        if self._Output["价格"].shape[0] - StartInd < self.MinSummaryWindow: return 0
        Price, nID = self._Output["价格"][StartInd:], self._Output["价格"].shape[1]
        IDMask = self._FactorTable.getIDMask(idt=idt, ids=self._IDs, id_filter_str=self.IDFilter).values
        Price = Price[:, IDMask]
        Mask = pd.notnull(Price)
        Statistics, pValue = np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan), np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan)
        for i in range(Price.shape[1]):
            for j in range(i+1, Price.shape[1]):
                ijMask = (Mask[:, i] & Mask[:, j])
                try:
                    iRslt = sm.tsa.stattools.coint(Price[:,i][ijMask], Price[:,j][ijMask], **self.CointArgs)
                    Statistics[i, j] = Statistics[j, i] = iRslt[0]
                    pValue[i, j] = pValue[j, i] = iRslt[1]
                except:
                    pass
        self._Output["统计量"][idt], self._Output["p值"][idt] = pd.DataFrame(index=self._IDs, columns=self._IDs), pd.DataFrame(index=self._IDs, columns=self._IDs)
        self._Output["统计量"][idt].iloc[IDMask, IDMask] = Statistics
        self._Output["p值"][idt].iloc[IDMask, IDMask] = pValue
        return 0 
Example 39
Project: QuantStudio   Author: Scorpi000   File: Spread.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        DTs = sorted(self._Output["统计量"])
        self._Output["最后一期检验"] = {"统计量": self._Output["统计量"][DTs[-1]], "p值": self._Output["p值"][DTs[-1]]}
        Price = self._Output.pop("价格")
        if np.isinf(self.SummaryWindow) and (DTs[-1]==self._iDT) and (not self.IDFilter):
            self._Output["全样本检验"] = deepcopy(self._Output["最后一期检验"])
        else:
            Mask = pd.notnull(Price)
            Statistics, pValue = np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan), np.full(shape=(Price.shape[1], Price.shape[1]), fill_value=np.nan)
            for i in range(Price.shape[1]):
                for j in range(i+1, Price.shape[1]):
                    ijMask = (Mask[:, i] & Mask[:, j])
                    try:
                        iRslt = sm.tsa.stattools.coint(Price[:,i][ijMask], Price[:,j][ijMask], **self.CointArgs)
                        Statistics[i, j] = Statistics[j, i] = iRslt[0]
                        pValue[i, j] = pValue[j, i] = iRslt[1]
                    except:
                        pass
            self._Output["全样本检验"] = {"统计量": pd.DataFrame(Statistics, index=self._IDs, columns=self._IDs), "p值": pd.DataFrame(pValue, index=self._IDs, columns=self._IDs)}
        self._Output["滚动检验"] = {"统计量": pd.Panel(self._Output.pop("统计量")).loc[DTs].swapaxes(0, 1).to_frame(filter_observations=False).reset_index(),
                                    "p值": pd.Panel(self._Output.pop("p值")).loc[DTs].swapaxes(0, 1).to_frame(filter_observations=False).reset_index()}
        Cols = self._Output["滚动检验"]["统计量"].columns.tolist()
        Cols[0], Cols[1] = "时点", "ID"
        self._Output["滚动检验"]["统计量"].columns = self._Output["滚动检验"]["p值"].columns = Cols
        return 0 
Example 40
Project: QuantStudio   Author: Scorpi000   File: AbnormalReturn.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        CurInd = self._AllDTs.index(idt)
        if CurInd<=self.EventPreWindow+self.EstWindow: return 0
        self._Output["事件记录"][:, 2] += 1
        IDs = self._FactorTable.getFilteredID(idt=idt, id_filter_str=self.EventFilter)
        nID, EventWindow = len(IDs), self.EventPreWindow+1+self.EventPostWindow
        if nID>0:
            self._Output["事件记录"] = np.r_[self._Output["事件记录"], np.c_[IDs, [idt]*nID, np.zeros(shape=(nID, 1))]]
            self._Output["正常收益率"] = np.r_[self._Output["正常收益率"], np.full(shape=(nID, EventWindow), fill_value=np.nan)]
            self._Output["异常收益率"] = np.r_[self._Output["异常收益率"], np.full(shape=(nID, EventWindow), fill_value=np.nan)]
            self._Output["异常协方差"] = np.r_[self._Output["异常协方差"], np.full(shape=(nID, EventWindow, EventWindow), fill_value=np.nan)]
            EstStartInd = CurInd - self.EventPreWindow - self.EstWindow - 1
            Price = self._FactorTable.readData(dts=self._AllDTs[EstStartInd:CurInd+1], ids=IDs, factor_names=[self.PriceFactor]).iloc[0, :, :]
            Return = _calcReturn(Price.values, return_type=self.ReturnType)
            EstReturn = Return[:self.EstWindow]
            if self.EstSampleFilter:
                temp = self._FactorTable.readData(dts=self._AllDTs[EstStartInd+1:EstStartInd+self.EstWindow+1], ids=IDs, factor_names=self._FilterFactors)
                FilterMask = eval(self._CompiledIDFilterStr).values
            else:
                FilterMask = np.full(EstReturn.shape, fill_value=True)
            FilterMask = (FilterMask & pd.notnull(EstReturn))
            FilterMask = (FilterMask & (np.flipud(np.cumsum(np.flipud(FilterMask), axis=0))<=self.EstSampleLen))
            EstReturn[~FilterMask] = np.nan
            ExpectedReturn, Var = np.nanmean(EstReturn, axis=0), np.nanvar(EstReturn, axis=0, ddof=1)
            FilterMask = ((np.sum(FilterMask, axis=0)<self.EstSampleLen) | (Var<1e-6))
            ExpectedReturn[FilterMask] = np.nan
            Var[FilterMask] = np.nan
            self._Output["正常收益率"][-nID:, :] = ExpectedReturn.reshape((nID, 1)).repeat(EventWindow, axis=1)
            self._Output["异常收益率"][-nID:, :self.EventPreWindow+1] = (Return[self.EstWindow:] - ExpectedReturn).T
            CovMatrix = (np.eye(EventWindow)+np.ones((EventWindow, EventWindow))/self.EstSampleLen).reshape((1, EventWindow, EventWindow)).repeat(nID, axis=0)
            self._Output["异常协方差"][-nID:, :, :] = (CovMatrix.T*Var).T
        Mask = (self._Output["事件记录"][:, 2]<=self.EventPostWindow)
        if np.sum(Mask)==0: return 0
        IDs = self._Output["事件记录"][:, 0][Mask]
        RowPos, ColPos = np.arange(self._Output["异常收益率"].shape[0])[Mask].tolist(), (self._Output["事件记录"][Mask, 2]+self.EventPreWindow).astype(np.int)
        Price = self._FactorTable.readData(dts=[self._AllDTs[CurInd-1], idt], ids=sorted(set(IDs)), factor_names=[self.PriceFactor]).iloc[0, :, :].loc[:, IDs]
        self._Output["异常收益率"][RowPos, ColPos] = (_calcReturn(Price.values, return_type=self.ReturnType)[0] - self._Output["正常收益率"][RowPos, ColPos])
        return 0 
Example 41
Project: QuantStudio   Author: Scorpi000   File: IC.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        self._Output["IC"] = pd.DataFrame(np.array(self._Output["IC"]).T, index=self._Output.pop("时点"), columns=list(self.LookBack))
        if self.FactorOrder=="升序": self._Output["IC"] = -self._Output["IC"]
        self._Output["统计数据"] = pd.DataFrame(index=self._Output["IC"].columns)
        self._Output["统计数据"]["IC平均值"] = self._Output["IC"].mean()
        nDT = pd.notnull(self._Output["IC"]).sum()
        self._Output["统计数据"]["标准差"] = self._Output["IC"].std()
        self._Output["统计数据"]["IC_IR"] = self._Output["统计数据"]["IC平均值"] / self._Output["统计数据"]["标准差"]
        self._Output["统计数据"]["t统计量"] = self._Output["统计数据"]["IC_IR"] * nDT**0.5
        self._Output["统计数据"]["胜率"] = (self._Output["IC"]>0).sum() / nDT
        return 0 
Example 42
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 5 votes vote down vote up
def standardizeRank(data, mask=None, cat_data=None, ascending=True, uniformization=True, perturbation=False, offset=0.5, other_handle='填充None'):
    """Rank 标准化"""
    if other_handle=="保持不变":
        StdData = np.copy(data)
    else:
        StdData = np.empty(data.shape,dtype='float')+np.nan
    if mask is None:
        mask = pd.isnull(StdData)
    if perturbation:
        UniqueData = data[pd.notnull(data)]
        if UniqueData.shape[0]>0:
            UniqueData = np.sort(pd.unique(UniqueData))
            MinDiff = np.min(np.abs(np.diff(UniqueData)))
            data = data+np.random.rand(data.shape[0])*MinDiff*0.01
    CatMasks = maskCategary(data.shape[0],cat_data=cat_data,mask=mask)
    for jCat,jCatMask in CatMasks.items():
        jData = data[jCatMask]
        jNotNaMask = pd.notnull(jData)
        if ascending:
            jRank = np.argsort(np.argsort(jData[jNotNaMask]))
        else:
            jRank = np.argsort(np.argsort(-jData[jNotNaMask]))
        if uniformization:
            jRank = (jRank.astype('float')+offset)/jRank.shape[0]
        else:
            jRank = jRank.astype('float')
        jData[jNotNaMask] = jRank
        StdData[jCatMask] = jData
    return StdData

# 分位数变换(Quantile Transformation)标准化
# data: 待标准化的数据, array; cat_data: 分类数据, array
# ascending: 是否升序, 可选: True, False 
Example 43
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 5 votes vote down vote up
def standardizeDynamicPeer(data, corr_matrix, mask=None, cat_data=None, n_group=10, other_handle='填充None'):
    """动态分组标准化"""
    if mask is None:
        mask = (np.zeros(data.shape)==0)
    if other_handle=="保持不变":
        StdData = np.copy(data)
    else:
        StdData = np.empty(data.shape,dtype='float')+np.nan
    for j in range(data.shape[0]):
        if not mask[j]:
            continue
        jPeerCorr = corr_matrix[j,:]
        jNum = min((n_group,np.sum(jPeerCorr>0.0)))
        jData = None
        if jNum>=2:
            jPeerInds = np.argsort(-jPeerCorr)[:jNum]
            jData = data[jPeerInds]
            if np.sum(pd.notnull(jData))<2:
                jData = None
        if jData is None:
            if cat_data is not None:
                jCat = cat_data[j]
                if pd.notnull(jCat):
                    jCatMask = (cat_data==jCat)
                else:
                    jCatMask = pd.isnull(cat_data)
                jData = data[jCatMask]
            else:
                jData = data
        jStd = np.nanstd(jData)
        jAvg = np.nanmean(jData)
        if jStd==0:
            StdData[j] = 0.0
        else:
            StdData[j] = (data[j]-jAvg)/jStd
    return StdData

# 以之前的值进行缺失值填充
# data: 待填充的数据, array; dts: 时间序列, array; lookback: 如果指定了时间序列 dts 则为回溯的时间, 以秒为单位, 否则为回溯期数 
Example 44
Project: QuantStudio   Author: Scorpi000   File: DataPreprocessingFun.py    GNU General Public License v3.0 5 votes vote down vote up
def fillNaNByRegress(Y, X, mask=None, cat_data=None, constant=False, dummy_data=None, drop_dummy_na=False):
    StdData = np.copy(Y)
    if mask is None:
        mask = (np.zeros(Y.shape)==0)
    CatMasks = maskCategary(Y.shape[0], cat_data=cat_data, mask=mask)
    for iCat,iCatMask in CatMasks.items():
        iY = Y[iCatMask]
        iNAMask = pd.isnull(iY)
        iNANum = np.sum(iNAMask)
        if iNANum==0:
            continue
        iX = (X[iCatMask] if X is not None else X)
        iDummy = (dummy_data[iCatMask] if dummy_data is not None else dummy_data)
        iXNotNAMask,_,_,iXX = prepareRegressData(np.ones(iY.shape[0]), iX, has_constant=constant, dummy_data=iDummy,drop_dummy_na=drop_dummy_na)
        iYY = iY[iXNotNAMask]
        iRegressMask = pd.notnull(iYY)
        if np.sum(iRegressMask)<2:
            continue
        iRslt = sm.OLS(iYY[iRegressMask],iXX[iRegressMask],missing='drop').fit()
        iBeta = iRslt.params
        iX = np.zeros((iY.shape[0],iBeta.shape[0]))+np.nan
        iX[iXNotNAMask] = iXX
        iY_hat = np.sum(iX*iBeta,axis=1)
        iY[iNAMask] = iY_hat[iNAMask]
        StdData[iCatMask] = iY
    return StdData
# 异常值处理; 超过给定标准差倍数的值用相应标准差倍数填充
# data: 待处理的数据, array; std_multiplier: 标准差倍数, double
# method: 处理方式, 可选: 截断, 丢弃, 变换; std_tmultiplier: method为变换时所用到的标准差倍数, double 
Example 45
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def calcPortfolioReturn(portfolio, return_rate):
    PortfolioReturn = 0
    for iID in portfolio:
        iRetRate = return_rate.get(iID)
        if pd.notnull(iRetRate):
            PortfolioReturn += portfolio[iID]*return_rate[iID]
        else:
            PortfolioReturn += portfolio[iID]*(-1)
    return PortfolioReturn
# 计算收益率序列, wealth_seq: 净值序列, array; init_wealth: 初始财富, 若为None使用wealth_seq的第一个元素 
Example 46
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def calcBeta(wealth_seq, market_wealth_seq):
    YieldSeq = calcYieldSeq(wealth_seq)
    MarketYieldSeq = calcYieldSeq(market_wealth_seq)
    Mask = (pd.notnull(MarketYieldSeq) & pd.notnull(YieldSeq))
    return np.cov(YieldSeq[Mask],MarketYieldSeq[Mask])[0,1]/np.nanvar(MarketYieldSeq)
# 计算收益率的 Lower Partial Moment, wealth_seq: 净值序列, array 
Example 47
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def calcHPM(wealth_seq, threshold=0.0, order=2):
    YieldSeq = calcYieldSeq(wealth_seq)
    # This method returns a lower partial moment of the returns
    # Create an array has same length as wealth_seq containing the minimum return threshold
    ThresholdArray = np.empty(YieldSeq.shape[0])
    ThresholdArray.fill(threshold)
    # Calculate the difference between the threshold and the returns
    Diff = YieldSeq - ThresholdArray
    # Set the minimum of each to 0
    Diff = Diff.clip(min=0)
    # Return the sum of the different to the power of order
    return np.nansum(Diff ** order) / np.sum(pd.notnull(YieldSeq))
# 计算夏普比率, wealth_seq: 净值序列, array 
Example 48
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def summaryStrategy(wealth_seq, dts, dt_ruler=None, init_wealth=None):
    nCol = (wealth_seq.shape[1] if wealth_seq.ndim>1 else 1)
    if nCol==1: wealth_seq = wealth_seq.reshape((wealth_seq.shape[0], 1))
    wealth_seq, dts = _densifyWealthSeq(wealth_seq, dts, dt_ruler)
    YieldSeq = calcYieldSeq(wealth_seq, init_wealth)
    if init_wealth is None: init_wealth = wealth_seq[0]
    StartDT, EndDT = dts[0], dts[-1]
    SummaryIndex = ['起始时点', '结束时点']
    SummaryData = [np.array([StartDT]*nCol), np.array([EndDT]*nCol)]
    SummaryIndex.append('时点数')
    SummaryData.append(np.zeros(nCol) + len(dts))
    SummaryIndex.append('总收益率')
    SummaryData.append(wealth_seq[-1] / init_wealth - 1)
    SummaryIndex.append('年化收益率')
    SummaryData.append(calcAnnualYield(wealth_seq, start_dt=StartDT, end_dt=EndDT))
    SummaryIndex.append('年化波动率')
    SummaryData.append(calcAnnualVolatility(wealth_seq, start_dt=StartDT, end_dt=EndDT))
    SummaryIndex.append('Sharpe比率')
    SummaryData.append(SummaryData[4] / SummaryData[5])
    SummaryIndex.append('胜率')
    SummaryData.append(np.sum(YieldSeq>=0, axis=0) / np.sum(pd.notnull(YieldSeq), axis=0))
    SummaryIndex.extend(("最大回撤率", "最大回撤开始时点", "最大回撤结束时点"))
    MaxDrawdownRate, MaxDrawdownStartDT, MaxDrawdownEndDT = [], [], []
    for i in range(nCol):
        iMaxDrawdownRate, iMaxDrawdownStartPos, iMaxDrawdownEndPos = calcMaxDrawdownRate(wealth_seq=wealth_seq[:, i])
        MaxDrawdownRate.append(np.abs(iMaxDrawdownRate))
        MaxDrawdownStartDT.append((dts[iMaxDrawdownStartPos] if iMaxDrawdownStartPos is not None else None))
        MaxDrawdownEndDT.append((dts[iMaxDrawdownEndPos] if iMaxDrawdownEndPos is not None else None))
    SummaryData.extend((np.array(MaxDrawdownRate), np.array(MaxDrawdownStartDT), np.array(MaxDrawdownEndDT)))
    return pd.DataFrame(SummaryData, index=SummaryIndex)
# 计算每年的收益率, wealth_seq: 净值序列, dts: 时间序列, dt_ruler: 时间标尺 
Example 49
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def loadCSVFilePortfolioSignal(csv_path):
    FileSignals = {}
    if not os.path.isfile(csv_path): raise __QS_Error__("文件: '%s' 不存在" % csv_path)
    with open(csv_path) as CSVFile:
        FirstLine = CSVFile.readline()
    if len(FirstLine.split(","))!=3:# 横向排列
        CSVDF = readCSV2Pandas(csv_path,detect_file_encoding=True)
        temp = list(CSVDF.columns)
        nCol = len(temp)
        AllSignalDates = [str(int(temp[i])) for i in range(0,nCol,2)]
        for i in range(int(nCol/2)):
            iDT = CSVDF.columns[i*2]
            iSignal = CSVDF.iloc[:,i*2:i*2+2]
            iSignal = iSignal[pd.notnull(iSignal.iloc[:,1])].set_index([iDT]).iloc[:,0]
            FileSignals[AllSignalDates[i]] = iSignal
    else:# 纵向排列
        CSVDF = readCSV2Pandas(csv_path,detect_file_encoding=True,header=0)
        AllSignalDates = pd.unique(CSVDF.iloc[:,0])
        AllColumns = list(CSVDF.columns)
        for iDT in AllSignalDates:
            iSignal = CSVDF.iloc[:, 1:][CSVDF.iloc[:,0]==iDT]
            iSignal = iSignal.set_index(AllColumns[1:2])
            iSignal = iSignal[AllColumns[2]]
            FileSignals[str(iDT)] = iSignal
    return FileSignals
# 将投资组合信号写入CSV文件 
Example 50
Project: QuantStudio   Author: Scorpi000   File: StrategyTestFun.py    GNU General Public License v3.0 5 votes vote down vote up
def genRandomPortfolio(ids, target_num=20, weight=None):
    IDs = np.random.choice(np.array(ids),target_num,replace=False)
    IDs.sort()
    if weight is None:
        return pd.Series(1/IDs.shape[0],index=IDs)
    Portfolio = weight[IDs]
    Portfolio = Portfolio[pd.notnull(Portfolio) & (Portfolio!=0)]
    return Portfolio/Portfolio.sum()
# 以筛选的方式形成投资组合