Java Code Examples for org.apache.mahout.math.Vector#nonZeroes()

The following examples show how to use org.apache.mahout.math.Vector#nonZeroes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: LogisticL2DiffFunction.java From laser with Apache License 2.0

6 votes

public double evaluatePrimalObjective(double[] x) {
	double result = 0.0;
	for (int row = 0; row < this.m; row++) {
		Vector v = this.a[row];
		double ax = 0;
		for (Element e : v.nonZeroes()) {
			// Calculate dot product: ai'*x, where i ai denotes the ith row
			// of a
			ax += e.get() * x[e.index()];
		}
		double axb = ax * b[row];
		double thisLoopResult = Math.log(1.0 + Math.exp(-axb));
		result += thisLoopResult;
	}
	result /= m;
	return result;
}

Example 2

Source File: CRFF1Loss.java From pyramid with Apache License 2.0

6 votes

private double calEmpiricalCountForFeature(int parameterIndex) {
    double empiricalCount = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];
    if (featureIndex==-1){
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            if (dataSet.getMultiLabels()[i].matchClass(classIndex)) {
                empiricalCount += 1;
            }
        }
    } else{
        Vector column = dataSet.getColumn(featureIndex);
        MultiLabel[] multiLabels = dataSet.getMultiLabels();
        for (Vector.Element element: column.nonZeroes()){
            int dataIndex = element.index();
            double featureValue = element.get();
            if (multiLabels[dataIndex].matchClass(classIndex)){
                empiricalCount += featureValue;
            }
        }
    }
    return empiricalCount;
}

Example 3

Source File: CMLCRFElasticNet.java From pyramid with Apache License 2.0

6 votes

private double calPredictedFeatureCounts(int parameterIndex) {
    double count = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];

    if (featureIndex == -1) {
        for (int i=0; i<numData; i++) {
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element : featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }
    return count;
}

Example 4

Source File: ElasticNetLogisticTrainer.java From pyramid with Apache License 2.0

6 votes

private double calEmpricalCount(int parameterIndex){
    int classIndex = logisticRegression.getWeights().getClassIndex(parameterIndex);
    int featureIndex = logisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            count += targets[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += featureValue * targets[dataPointIndex][classIndex];
        }
    }
    return count;
}

Example 5

Source File: DataSetUtil.java From pyramid with Apache License 2.0

6 votes

public static void normalize(DataSet dataSet, double[] normalizationConstants){
    for (int j=0;j<dataSet.getNumFeatures();j++){
        Vector column = dataSet.getColumn(j);
        List<Integer> indices = new ArrayList<>();
        List<Double> values = new ArrayList<>();
        for (Vector.Element nonzero: column.nonZeroes()){
            indices.add(nonzero.index());
            values.add(nonzero.get());
        }

        for (int i=0;i<indices.size();i++){
            int dataId = indices.get(i);
            double old = values.get(i);
            // if normalization constant is 0, use 0 as the normalized value
            dataSet.setFeatureValue(dataId,j, SafeDivide.divide(old,old/normalizationConstants[j],0.0));
        }
    }
}

Example 6

Source File: MLLogisticLoss.java From pyramid with Apache License 2.0

6 votes

private double calPredictedCount(int parameterIndex){
    int classIndex = mlLogisticRegression.getWeights().getClassIndex(parameterIndex);
    int featureIndex = mlLogisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }
    return count;
}

Example 7

Source File: MLLogisticLoss.java From pyramid with Apache License 2.0

6 votes

private double calEmpricalCount(int parameterIndex){
    int classIndex = mlLogisticRegression.getWeights().getClassIndex(parameterIndex);
    MultiLabel[] labels = dataSet.getMultiLabels();
    int featureIndex = mlLogisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            if (labels[i].matchClass(classIndex)){
                count +=1;
            }
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            MultiLabel label = labels[dataPointIndex];
            if (label.matchClass(classIndex)){
                count += featureValue;
            }
        }
    }
    return count;
}

Example 8

Source File: DataSetUtil.java From pyramid with Apache License 2.0

6 votes

public static Pair<DataSet, double[][]> sampleData(DataSet dataSet, double[][] targetDistribution, List<Integer> indices){
    DataSet sample;
    int numClasses = targetDistribution[0].length;
    double[][] sampledTargets = new double[indices.size()][numClasses];
    sample = DataSetBuilder.getBuilder().dense(dataSet.isDense()).missingValue(dataSet.hasMissingValue())
            .numDataPoints(indices.size()).numFeatures(dataSet.getNumFeatures()).build();

    for (int i=0;i<indices.size();i++){
        int indexInOld = indices.get(i);
        Vector oldVector = dataSet.getRow(indexInOld);
        double[] targets = targetDistribution[indexInOld];
        //copy label
        sampledTargets[i] = Arrays.copyOf(targets,targets.length);
        //copy row feature values, optimized for sparse vector
        for (Vector.Element element: oldVector.nonZeroes()){
            sample.setFeatureValue(i,element.index(),element.get());
        }

    }

    sample.setFeatureList(dataSet.getFeatureList());

    //ignore idTranslator as we may have duplicate extIds
    return new Pair<>(sample, sampledTargets);
}

Example 9

Source File: BlockwiseCD.java From pyramid with Apache License 2.0

6 votes

private double calHessiansForFeature(int l, int m) {
    double count = 0.0;
    if (m == -1) {
        for (int i=0; i<numData; i++) {
            count += (Math.pow(this.classProbMatrix[i][l],2) - this.classProbMatrix[i][l]);
        }
    } else {
        Vector featureColumn = dataSet.getColumn(m);
        for (Vector.Element element : featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += (Math.pow(this.classProbMatrix[dataPointIndex][l]*featureValue, 2) -
                    this.classProbMatrix[dataPointIndex][l] * Math.pow(featureValue,2));
        }
    }
    return count;
}

Example 10

Source File: IMLLogisticLoss.java From pyramid with Apache License 2.0

6 votes

private double calPredictedCount(int parameterIndex){
    int classIndex = logisticRegression.getWeights().getClassIndex(parameterIndex);
    int featureIndex = logisticRegression.getWeights().getFeatureIndex(parameterIndex);
    double count = 0;
    //bias
    if (featureIndex == -1){
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()){
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }
    return count;
}

Example 11

Source File: CRFLoss.java From pyramid with Apache License 2.0

5 votes

private double calGradientForFeature(int parameterIndex) {
    double count = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];

    if (featureIndex == -1) {
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }

    count -= this.empiricalCounts[parameterIndex];

    // regularize
    if (regularizeAll){
        count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
    } else {
        if (featureIndex != -1) {
            count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
        }
    }
    return count;
}

Example 12

Source File: DataSetUtil.java From pyramid with Apache License 2.0

5 votes

/**
 * create a subset with the indices
 * it's fine to have duplicate indices
 * idTranslator is not saved in sampleData as we may have duplicate extIds
 * @param dataSet
 * @param indices
 * @return
 */
public static ClfDataSet sampleData(ClfDataSet dataSet, List<Integer> indices){
    ClfDataSet sample;
    int numClasses = dataSet.getNumClasses();
    boolean missingValue = dataSet.hasMissingValue();
    if (dataSet instanceof DenseClfDataSet){
        sample = new DenseClfDataSet(indices.size(),dataSet.getNumFeatures(), missingValue, numClasses);
    } else {
        sample = new SparseClfDataSet(indices.size(),dataSet.getNumFeatures(), missingValue, numClasses);
    }
    int[] labels = dataSet.getLabels();
    for (int i=0;i<indices.size();i++){
        int indexInOld = indices.get(i);
        Vector oldVector = dataSet.getRow(indexInOld);
        int label = labels[indexInOld];
        //copy label
        sample.setLabel(i,label);
        //copy row feature values, optimized for sparse vector
        for (Vector.Element element: oldVector.nonZeroes()){
            sample.setFeatureValue(i,element.index(),element.get());
        }

    }

    sample.setLabelTranslator(dataSet.getLabelTranslator());
    sample.setFeatureList(dataSet.getFeatureList());

    //ignore idTranslator as we may have duplicate extIds
    return sample;
}

Example 13

Source File: CRFF1Loss.java From pyramid with Apache License 2.0

5 votes

private double calGradientForFeature(int parameterIndex) {
    double count = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];

    if (featureIndex == -1) {
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            count += this.classProbMatrix[i][classIndex];
        }
    } else {
        Vector featureColumn = dataSet.getColumn(featureIndex);
        for (Vector.Element element: featureColumn.nonZeroes()) {
            int dataPointIndex = element.index();
            double featureValue = element.get();
            count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue;
        }
    }

    count -= this.empiricalCounts[parameterIndex];

    // regularize
    if (regularizeAll){
        count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
    } else {
        if (featureIndex != -1) {
            count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance;
        }
    }
    return count;
}

Example 14

Source File: AbstractRobustCBMOptimizer.java From pyramid with Apache License 2.0

5 votes

private double effectivePositives(int componentIndex, int labelIndex){
    double sum = 0;
    Vector labelColumn = labelMatrix.getColumn(labelIndex);
    for (Vector.Element element: labelColumn.nonZeroes()){
        int dataIndex = element.index();
        sum += gammas[dataIndex][componentIndex] * noiseLabelWeights[dataIndex][labelIndex];
    }
    return sum;
}

Example 15

Source File: DataSetUtil.java From pyramid with Apache License 2.0

5 votes

/**
 * make every non-zero feature 1
 * @param dataSet
 */
public static void binarizeFeature(DataSet dataSet){
    for (int i=0;i<dataSet.getNumDataPoints();i++){
        List<Integer> nonZeors = new ArrayList<>();
        Vector row = dataSet.getRow(i);
        for (Vector.Element element: row.nonZeroes()){
            nonZeors.add(element.index());
        }
        for (int j:nonZeors){
            dataSet.setFeatureValue(i,j,1);
        }
    }
}

Example 16

Source File: AugmentedLRLoss.java From pyramid with Apache License 2.0

5 votes

private double calPredictedCountFeatureWeight(int d){
    Vector featureColumn = dataSet.getColumn(d);
    double sum = 0;
    for (Vector.Element element: featureColumn.nonZeroes()){
        int dataIndex = element.index();
        double feature = element.get();
        sum += feature* expectedProbs[dataIndex];
    }
    return sum;
}

Example 17

Source File: MultiLabel.java From pyramid with Apache License 2.0

5 votes

/**
 *
 * @param vector a binary label vector
 */
public MultiLabel(Vector vector){
    this();
    for (Vector.Element element:vector.nonZeroes()){
        this.addLabel(element.index());
    }
}

Example 18

Source File: BMTrainer.java From pyramid with Apache License 2.0

5 votes

private double weightedSum(int clusterIndex, int dimensionIndex){
    Vector column = dataSet.getColumn(dimensionIndex);
    double sum = 0;
    for (Vector.Element nonzero: column.nonZeroes()){
        int i = nonzero.index();
        sum += gammas[i][clusterIndex];
    }
    return sum;
}

Example 19

Source File: AbstractRecoverCBMOptimizer.java From pyramid with Apache License 2.0

5 votes

private double effectivePositives(int componentIndex, int labelIndex){
    double sum = 0;
    Vector labelColumn = labelMatrix.getColumn(labelIndex);
    for (Vector.Element element: labelColumn.nonZeroes()){
        int dataIndex = element.index();
        sum += gammas[dataIndex][componentIndex];
    }
    return sum;
}

Example 20

Source File: DataSetUtil.java From pyramid with Apache License 2.0

4 votes

/**
 * only keep the selected featureList
 * @param dataSet
 * @return
 */
public static MultiLabelClfDataSet sampleFeatures(MultiLabelClfDataSet dataSet, List<Integer> columnsToKeep){
    MultiLabelClfDataSet trimmed ;
    boolean missingValue = dataSet.hasMissingValue();
    int numClasses = dataSet.getNumClasses();
    // keep density
    if (dataSet.isDense()) {
        trimmed = new DenseMLClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    } else{
        trimmed = new SparseMLClfDataSet(dataSet.getNumDataPoints(),columnsToKeep.size(), missingValue, numClasses);
    }


    for (int j=0;j<trimmed.getNumFeatures();j++){
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element: vector.nonZeroes()){
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex,j,value);
        }
    }
    //copy labels
    MultiLabel[] multiLabels = dataSet.getMultiLabels();

    for (int i=0;i<trimmed.getNumDataPoints();i++){
        trimmed.addLabels(i,multiLabels[i].getMatchedLabels());
    }
    //just copy settings


    trimmed.setLabelTranslator(dataSet.getLabelTranslator());
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i=0;i<newFeatures.size();i++){
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));

    return trimmed;
}