Java Code Examples for org.apache.mahout.math.Vector#nonZeroes()

The following examples show how to use org.apache.mahout.math.Vector#nonZeroes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LogisticL2DiffFunction.java    From laser with Apache License 2.0 6 votes vote down vote up
public double evaluatePrimalObjective(double[] x) {
	double result = 0.0;
	for (int row = 0; row < this.m; row++) {
		Vector v = this.a[row];
		double ax = 0;
		for (Element e : v.nonZeroes()) {
			// Calculate dot product: ai'*x, where i ai denotes the ith row
			// of a
			ax += e.get() * x[e.index()];
		}
		double axb = ax * b[row];
		double thisLoopResult = Math.log(1.0 + Math.exp(-axb));
		result += thisLoopResult;
	}
	result /= m;
	return result;
}
 
Example 2
Source File: CRFF1Loss.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private double calEmpiricalCountForFeature(int parameterIndex) {
    double empiricalCount = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];
    if (featureIndex==-1){
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            if (dataSet.getMultiLabels()[i].matchClass(classIndex)) {
                empiricalCount += 1;
            }
        }
    } else{
        Vector column = dataSet.getColumn(featureIndex);
        MultiLabel[] multiLabels = dataSet.getMultiLabels();
        for (Vector.Element element: column.nonZeroes()){
            int dataIndex = element.index();
            double featureValue = element.get();
            if (multiLabels[dataIndex].matchClass(classIndex)){
                empiricalCount += featureValue;
            }
        }
    }
    return empiricalCount;
}
 
Example 3
Source File: CMLCRFElasticNet.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private double calPredictedFeatureCounts(int parameterIndex) {
    double count = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];

    if (featureIndex == -1) {
        for (int i=0; i<numData; i++) {
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element : featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }
    return count;
}
 
Example 4
Source File: ElasticNetLogisticTrainer.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private double calEmpricalCount(int parameterIndex){
    int classIndex = logisticRegression.getWeights().getClassIndex(parameterIndex);
    int featureIndex = logisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            count += targets[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += featureValue * targets[dataPointIndex][classIndex];
        }
    }
    return count;
}
 
Example 5
Source File: DataSetUtil.java    From pyramid with Apache License 2.0 6 votes vote down vote up
public static void normalize(DataSet dataSet, double[] normalizationConstants){
    for (int j=0;j<dataSet.getNumFeatures();j++){
        Vector column = dataSet.getColumn(j);
        List<Integer> indices = new ArrayList<>();
        List<Double> values = new ArrayList<>();
        for (Vector.Element nonzero: column.nonZeroes()){
            indices.add(nonzero.index());
            values.add(nonzero.get());
        }

        for (int i=0;i<indices.size();i++){
            int dataId = indices.get(i);
            double old = values.get(i);
            // if normalization constant is 0, use 0 as the normalized value
            dataSet.setFeatureValue(dataId,j, SafeDivide.divide(old,old/normalizationConstants[j],0.0));
        }
    }
}
 
Example 6
Source File: MLLogisticLoss.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private double calPredictedCount(int parameterIndex){
    int classIndex = mlLogisticRegression.getWeights().getClassIndex(parameterIndex);
    int featureIndex = mlLogisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }
    return count;
}
 
Example 7
Source File: MLLogisticLoss.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private double calEmpricalCount(int parameterIndex){
    int classIndex = mlLogisticRegression.getWeights().getClassIndex(parameterIndex);
    MultiLabel[] labels = dataSet.getMultiLabels();
    int featureIndex = mlLogisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            if (labels[i].matchClass(classIndex)){
                count +=1;
            }
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            MultiLabel label = labels[dataPointIndex];
            if (label.matchClass(classIndex)){
                count += featureValue;
            }
        }
    }
    return count;
}
 
Example 8
Source File: DataSetUtil.java    From pyramid with Apache License 2.0 6 votes vote down vote up
public static Pair<DataSet, double[][]> sampleData(DataSet dataSet, double[][] targetDistribution, List<Integer> indices){
    DataSet sample;
    int numClasses = targetDistribution[0].length;
    double[][] sampledTargets = new double[indices.size()][numClasses];
    sample = DataSetBuilder.getBuilder().dense(dataSet.isDense()).missingValue(dataSet.hasMissingValue())
            .numDataPoints(indices.size()).numFeatures(dataSet.getNumFeatures()).build();

    for (int i=0;i<indices.size();i++){
        int indexInOld = indices.get(i);
        Vector oldVector = dataSet.getRow(indexInOld);
        double[] targets = targetDistribution[indexInOld];
        //copy label
        sampledTargets[i] = Arrays.copyOf(targets,targets.length);
        //copy row feature values, optimized for sparse vector
        for (Vector.Element element: oldVector.nonZeroes()){
            sample.setFeatureValue(i,element.index(),element.get());
        }

    }

    sample.setFeatureList(dataSet.getFeatureList());

    //ignore idTranslator as we may have duplicate extIds
    return new Pair<>(sample, sampledTargets);
}
 
Example 9
Source File: BlockwiseCD.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private double calHessiansForFeature(int l, int m) {
    double count = 0.0;
    if (m == -1) {
        for (int i=0; i<numData; i++) {
            count += (Math.pow(this.classProbMatrix[i][l],2) - this.classProbMatrix[i][l]);
        }
    } else {
        Vector featureColumn = dataSet.getColumn(m);
        for (Vector.Element element : featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += (Math.pow(this.classProbMatrix[dataPointIndex][l]*featureValue, 2) -
                    this.classProbMatrix[dataPointIndex][l] * Math.pow(featureValue,2));
        }
    }
    return count;
}
 
Example 10
Source File: IMLLogisticLoss.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private double calPredictedCount(int parameterIndex){
    int classIndex = logisticRegression.getWeights().getClassIndex(parameterIndex);
    int featureIndex = logisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }
    return count;
}
 
Example 11
Source File: CRFLoss.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private double calGradientForFeature(int parameterIndex) {
    double count = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];

    if (featureIndex == -1) {
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }

    count -= this.empiricalCounts[parameterIndex];

    // regularize
    if (regularizeAll){
        count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
    } else {
        if (featureIndex != -1) {
            count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
        }
    }
    return count;
}
 
Example 12
Source File: DataSetUtil.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * create a subset with the indices
 * it's fine to have duplicate indices
 * idTranslator is not saved in sampleData as we may have duplicate extIds
 * @param dataSet
 * @param indices
 * @return
 */
public static ClfDataSet sampleData(ClfDataSet dataSet, List<Integer> indices){
    ClfDataSet sample;
    int numClasses = dataSet.getNumClasses();
    boolean missingValue = dataSet.hasMissingValue();
    if (dataSet instanceof DenseClfDataSet){
        sample = new DenseClfDataSet(indices.size(),dataSet.getNumFeatures(), missingValue, numClasses);
    } else {
        sample = new SparseClfDataSet(indices.size(),dataSet.getNumFeatures(), missingValue, numClasses);
    }
    int[] labels = dataSet.getLabels();
    for (int i=0;i<indices.size();i++){
        int indexInOld = indices.get(i);
        Vector oldVector = dataSet.getRow(indexInOld);
        int label = labels[indexInOld];
        //copy label
        sample.setLabel(i,label);
        //copy row feature values, optimized for sparse vector
        for (Vector.Element element: oldVector.nonZeroes()){
            sample.setFeatureValue(i,element.index(),element.get());
        }

    }

    sample.setLabelTranslator(dataSet.getLabelTranslator());
    sample.setFeatureList(dataSet.getFeatureList());

    //ignore idTranslator as we may have duplicate extIds
    return sample;
}
 
Example 13
Source File: CRFF1Loss.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private double calGradientForFeature(int parameterIndex) {
    double count = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];

    if (featureIndex == -1) {
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }

    count -= this.empiricalCounts[parameterIndex];

    // regularize
    if (regularizeAll){
        count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
    } else {
        if (featureIndex != -1) {
            count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
        }
    }
    return count;
}
 
Example 14
Source File: AbstractRobustCBMOptimizer.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private double effectivePositives(int componentIndex, int labelIndex){
    double sum = 0;
    Vector labelColumn = labelMatrix.getColumn(labelIndex);
    for (Vector.Element element: labelColumn.nonZeroes()){
        int dataIndex = element.index();
        sum += gammas[dataIndex][componentIndex] * noiseLabelWeights[dataIndex][labelIndex];
    }
    return sum;
}
 
Example 15
Source File: DataSetUtil.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * make every non-zero feature 1
 * @param dataSet
 */
public static void binarizeFeature(DataSet dataSet){
    for (int i=0;i<dataSet.getNumDataPoints();i++){
        List<Integer> nonZeors = new ArrayList<>();
        Vector row = dataSet.getRow(i);
        for (Vector.Element element: row.nonZeroes()){
            nonZeors.add(element.index());
        }
        for (int j:nonZeors){
            dataSet.setFeatureValue(i,j,1);
        }
    }
}
 
Example 16
Source File: AugmentedLRLoss.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private double calPredictedCountFeatureWeight(int d){
    Vector featureColumn = dataSet.getColumn(d);
    double sum = 0;
    for (Vector.Element element: featureColumn.nonZeroes()){
        int dataIndex = element.index();
        double feature = element.get();
        sum += feature* expectedProbs[dataIndex];
    }
    return sum;
}
 
Example 17
Source File: MultiLabel.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param vector a binary label vector
 */
public MultiLabel(Vector vector){
    this();
    for (Vector.Element element:vector.nonZeroes()){
        this.addLabel(element.index());
    }
}
 
Example 18
Source File: BMTrainer.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private double weightedSum(int clusterIndex, int dimensionIndex){
    Vector column = dataSet.getColumn(dimensionIndex);
    double sum = 0;
    for (Vector.Element nonzero: column.nonZeroes()){
        int i = nonzero.index();
        sum += gammas[i][clusterIndex];
    }
    return sum;
}
 
Example 19
Source File: AbstractRecoverCBMOptimizer.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private double effectivePositives(int componentIndex, int labelIndex){
    double sum = 0;
    Vector labelColumn = labelMatrix.getColumn(labelIndex);
    for (Vector.Element element: labelColumn.nonZeroes()){
        int dataIndex = element.index();
        sum += gammas[dataIndex][componentIndex];
    }
    return sum;
}
 
Example 20
Source File: DataSetUtil.java    From pyramid with Apache License 2.0 4 votes vote down vote up
/**
 * only keep the selected featureList
 * @param dataSet
 * @return
 */
public static MultiLabelClfDataSet sampleFeatures(MultiLabelClfDataSet dataSet, List<Integer> columnsToKeep){
    MultiLabelClfDataSet trimmed ;
    boolean missingValue = dataSet.hasMissingValue();
    int numClasses = dataSet.getNumClasses();
    // keep density
    if (dataSet.isDense()) {
        trimmed = new DenseMLClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    } else{
        trimmed = new SparseMLClfDataSet(dataSet.getNumDataPoints(),columnsToKeep.size(), missingValue, numClasses);
    }


    for (int j=0;j<trimmed.getNumFeatures();j++){
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element: vector.nonZeroes()){
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex,j,value);
        }
    }
    //copy labels
    MultiLabel[] multiLabels = dataSet.getMultiLabels();

    for (int i=0;i<trimmed.getNumDataPoints();i++){
        trimmed.addLabels(i,multiLabels[i].getMatchedLabels());
    }
    //just copy settings


    trimmed.setLabelTranslator(dataSet.getLabelTranslator());
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i=0;i<newFeatures.size();i++){
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));

    return trimmed;
}