org.apache.mahout.math.Vector Java Examples

The following examples show how to use org.apache.mahout.math.Vector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: Reranker.java From pyramid with Apache License 2.0

6 votes

public double prob(Vector vector, MultiLabel multiLabel){
    double[] marginals = labelCalibrator.calibratedClassProbs(classProbEstimator.predictClassProbs(vector));
    DynamicProgramming dynamicProgramming = new DynamicProgramming(marginals);
    List<Pair<MultiLabel,Double>> topK = dynamicProgramming.topK(numCandidate);

    PredictionCandidate predictionCandidate = new PredictionCandidate();
    predictionCandidate.x = vector;
    predictionCandidate.labelProbs = marginals;
    predictionCandidate.multiLabel = multiLabel;
    predictionCandidate.sparseJoint = topK;
    Vector feature = predictionFeatureExtractor.extractFeatures(predictionCandidate);
    double score = regressor.predict(feature);
    if (score>1){
        score=1;
    }

    if (score<0){
        score=0;
    }
    return score;
}

Example #2

Source File: IntervalSplitterTest.java From pyramid with Apache License 2.0

6 votes

static void test9(){
    RegTreeConfig regTreeConfig = new RegTreeConfig().setNumSplitIntervals(2);
    Vector vector = new DenseVector(4);
    vector.set(0,0);
    vector.set(1,1);
    vector.set(2,Double.NaN);
    vector.set(3,3);
    double[] probs = {1,0.5,1,0.6};
    double[] labels = {1,2,3,4};
    Splitter.GlobalStats globalStats = new Splitter.GlobalStats(labels,probs);
    List<Interval> intervals = IntervalSplitter.generateIntervals(regTreeConfig, vector, probs, labels,globalStats);
    System.out.println(intervals);
    System.out.println(IntervalSplitter.compress(intervals));
    System.out.println(1.5/(1.5+0.6));
    System.out.println(1+1+3*1.5/(1.5+0.6));

}

Example #3

Source File: AdmmIterationMapper.java From laser with Apache License 2.0

6 votes

private AdmmMapperContext assembleMapperContextFromCache(
		Vector[] inputSplitData, String splitId) throws IOException {
	try {
		AdmmMapperContext preContext = readPreviousAdmmMapperContext(
				splitId, previousIntermediateOutputLocationPath, fs, conf);
		return new AdmmMapperContext(splitId, inputSplitData,
				preContext.getUInitial(), preContext.getXInitial(),
				preContext.getZInitial(), preContext.getRho(),
				preContext.getLambdaValue(),
				preContext.getPrimalObjectiveValue(),
				preContext.getRNorm(), preContext.getSNorm());
	} catch (IOException e) {
		LOG.info("Key not found. Split ID: " + splitId + e.getMessage());
		throw new IOException("Key not found.  Split ID: " + splitId
				+ e.getMessage());
	}
}

Example #4

Source File: FusedKolmogorovFilterTest.java From pyramid with Apache License 2.0

6 votes

private static void test1(){
    Vector vector = new DenseVector(10);
    vector.set(0,0.1);
    vector.set(1,0.2);
    vector.set(2,0.15);
    vector.set(3,0.4);
    vector.set(4,0.7);
    vector.set(8,0.9);
    int[] labels = new int[10];
    labels[0] = 0 ;
    labels[1] = 1;
    labels[2] = 1;
    labels[3] = 1;
    labels[9] = 1;
    FusedKolmogorovFilter filter = new FusedKolmogorovFilter();
    filter.setNumBins(10);
    List<List<Double>> inputsEachClass = filter.generateInputsEachClass(vector, labels, 2);
    System.out.println(inputsEachClass);
    List<EmpiricalCDF> empiricalCDFs = filter.generateCDFs(vector,inputsEachClass);
    System.out.println(empiricalCDFs);
    System.out.println(filter.maxDistance(empiricalCDFs));
}

Example #5

Source File: KMeans.java From pyramid with Apache License 2.0

6 votes

private void assign(int i, boolean print){
    int previousAssignment = assignments[i];
    Vector vector = dataSet.getRow(i);
    double[] distances = IntStream.range(0,numComponents).mapToDouble(k->distance(vector, centers[k]))
            .toArray();
    int assignedC =  ArgMin.argMin(distances);
    assignments[i] = assignedC;
    if (print){
        if (assigned[i] && (previousAssignment!=assignedC)){
            System.out.println("assign instance "+(i+1)+" to cluster "+(assignedC+1)+", previously in cluster "+(previousAssignment+1));
        } else {
            System.out.println("assign instance "+(i+1)+" to cluster "+(assignedC+1));
        }
    }


    assigned[i] = true;
}

Example #6

Source File: KLLoss.java From pyramid with Apache License 2.0

6 votes

private double calEmpiricalCountForFeature(int parameterIndex) {
    double empiricalCount = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];
    if (featureIndex==-1){
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            empiricalCount += targetMarginals[i][classIndex];
        }
    } else{
        Vector column = dataSet.getColumn(featureIndex);
        for (Vector.Element element: column.nonZeroes()){
            int dataIndex = element.index();
            double featureValue = element.get();
            empiricalCount += featureValue*targetMarginals[dataIndex][classIndex];
        }
    }
    return empiricalCount;
}

Example #7

Source File: AdmmMapperContext.java From laser with Apache License 2.0

6 votes

public AdmmMapperContext(String splitId, Vector[] a, double[] b,
		double[] uInitial, double[] xInitial, double[] zInitial,
		double rho, double lambdaValue, double primalObjectiveValue,
		double rNorm, double sNorm) {
	this.splitId = splitId;
	this.a = a;
	this.b = b;
	this.uInitial = uInitial;
	this.xInitial = xInitial;
	this.zInitial = zInitial;
	this.rho = rho;
	this.lambdaValue = lambdaValue;
	this.primalObjectiveValue = primalObjectiveValue;
	this.rNorm = rNorm;
	this.sNorm = sNorm;
}

Example #8

Source File: CRFLoss.java From pyramid with Apache License 2.0

6 votes

/**
 * gradient of log likelihood
 * @return
 */
@Override
public Vector getGradient() {
    if (isGradientCacheValid) {
        return this.gradient;
    }
    if (logger.isDebugEnabled()){
        logger.debug("start method getGradient()");
    }
    // O(NdL)
    updateClassScoreMatrix();
    updateAssignmentScoreMatrix();
    updateAssignmentProbMatrix();
    updateCombProbSums();
    updateClassProbMatrix();
    updateGradient();
    this.isGradientCacheValid = true;
    if (logger.isDebugEnabled()){
        logger.debug("finish method getGradient()");
    }
    return this.gradient;
}

Example #9

Source File: Vectors.java From pyramid with Apache License 2.0

6 votes

public static double dot(Vector vector1, Vector vector2){
    if (vector1.size()!=vector2.size()){
        throw new IllegalArgumentException("vector1.size()!=vector2.size()");
    }

    boolean vector1Dense = vector1.isDense();
    boolean vector2Dense = vector2.isDense();

    if (vector1Dense&&vector2Dense){
        return dotDenseDense(vector1,vector2);
    } else if (vector1Dense && !vector2Dense){
        return dotDenseSparse(vector1,vector2);
    } else if (!vector1Dense && vector2Dense){
        return dotDenseSparse(vector2,vector1);
    } else {
        throw new UnsupportedOperationException("sparse dot sparse is not supported");
    }

}

Example #10

Source File: KLLoss.java From pyramid with Apache License 2.0

6 votes

/**
 * gradient of log likelihood
 * @return
 */
@Override
public Vector getGradient() {
    if (isGradientCacheValid) {
        return this.gradient;
    }
    if (logger.isDebugEnabled()){
        logger.debug("start method getGradient()");
    }
    // O(NdL)
    updateClassScoreMatrix();
    updateAssignmentScoreMatrix();
    updateAssignmentProbMatrix();
    updateCombProbSums();
    updateClassProbMatrix();
    updateGradient();
    this.isGradientCacheValid = true;
    if (logger.isDebugEnabled()){
        logger.debug("finish method getGradient()");
    }
    return this.gradient;
}

Example #11

Source File: CBMUtilityOptimizer.java From pyramid with Apache License 2.0

6 votes

private void updateGamma(int n) {
    Vector x = dataSet.getRow(n);
    BMDistribution bmDistribution = cbm.computeBM(x);
    // size = combination * components
    List<double[]> logPosteriors = new ArrayList<>();
    for (int c=0;c<combinations.size();c++){
        MultiLabel combination = combinations.get(c);
        double[] pos = bmDistribution.logPosteriorMembership(combination);
        logPosteriors.add(pos);
    }

    double[] sums = new double[cbm.numComponents];
    for (int k=0;k<cbm.numComponents;k++){
        double sum = 0;
        for (int c=0;c<combinations.size();c++){
            sum += targets[n][c]*logPosteriors.get(c)[k];
        }
        sums[k] = sum;
    }
    double[] posterior = MathUtil.softmax(sums);
    for (int k=0; k<cbm.numComponents; k++) {
        gammas[n][k] = posterior[k];
        gammasT[k][n] = posterior[k];
    }
}

Example #12

Source File: ElasticNetLogisticTrainer.java From pyramid with Apache License 2.0

5 votes

/**
 * a special back track line search for sufficient decrease with elasticnet penalized model
 * reference:
 * An improved glmnet for l1-regularized logistic regression.
 * @param searchDirection
 * @return
 */
private void lineSearch(Vector searchDirection, Vector gradient){
    Vector localSearchDir;
    double initialStepLength = 1;
    double shrinkage = 0.5;
    double c = 1e-4;
    double stepLength = initialStepLength;
    Vector start = logisticRegression.getWeights().getAllWeights();
    double penalty = penalty();
    double value = loss(penalty);
    if (logger.isDebugEnabled()){
        logger.debug("start line search");
        logger.debug("initial loss = "+loss());
    }
    double product = gradient.dot(searchDirection);

    localSearchDir = searchDirection;

    while(true){
        Vector step = localSearchDir.times(stepLength);
        Vector target = start.plus(step);
        logisticRegression.getWeights().setWeightVector(target);
        double targetPenalty = penalty();
        double targetValue = loss(targetPenalty);
        if (targetValue <= value + c*stepLength*(product + targetPenalty - penalty)){
            if (logger.isDebugEnabled()){
                logger.debug("step size = "+stepLength);
                logger.debug("final loss = "+targetValue);
                logger.debug("line search done");
            }
            break;
        }
        stepLength *= shrinkage;
    }
}

Example #13

Source File: ALSWRFactorizer.java From elasticsearch-taste with Apache License 2.0

5 votes

protected Vector sparseItemRatingVector(final PreferenceArray prefs) {
    final SequentialAccessSparseVector ratings = new SequentialAccessSparseVector(
            Integer.MAX_VALUE, prefs.length());
    for (final Preference preference : prefs) {
        ratings.set((int) preference.getUserID(), preference.getValue());
    }
    return ratings;
}

Example #14

Source File: CBMS.java From pyramid with Apache License 2.0

5 votes

/**
 * sort marginals, and keep top few
 * @param vector
 * @param top
 * @return
 */
public MultiLabel predictByMarginals(Vector vector, int top){
    double[] probs = predictClassProbs(vector);
    int[] sortedIndices = ArgSort.argSortDescending(probs);
    MultiLabel prediction = new MultiLabel();
    for (int i=0;i<top;i++){
        prediction.addLabel(sortedIndices[i]);
    }
    return prediction;
}

Example #15

Source File: MLLogisticRegression.java From pyramid with Apache License 2.0

5 votes

double logLikelihood(Vector vector, MultiLabel multiLabel){
        double[] classScores = predictClassScores(vector);
        int numAssignments = assignments.size();
        double[] assignmentScores = calAssignmentScores(classScores);
//        double[] assignmentScores = new double[numAssignments];
//        for (int a=0;a<numAssignments;a++){
//            MultiLabel assignment = assignments.get(a);
//            assignmentScores[a] = this.calAssignmentScore(assignment, classScores);
//        }
        double logDenominator = MathUtil.logSumExp(assignmentScores);

        double logNumerator = this.calAssignmentScore(multiLabel, classScores);
        return logNumerator-logDenominator;
    }

Example #16

Source File: RegressionTreeTest.java From pyramid with Apache License 2.0

5 votes

private static void test5(){
    RegressionTree tree = RegressionTree.newStump(10,0.5,-1.2,3);
    System.out.println(tree);
    Vector vector = new DenseVector(100);
    vector.set(10,0.6);
    System.out.println(tree.predict(vector));
}

Example #17

Source File: RidgeBinaryLogisticLoss.java From pyramid with Apache License 2.0

5 votes

/**
 * dot product of a column vector and another vector
 * @param columnIndex the bias feature has index 0
 * @param vector
 * @return
 */
private double columnDot(int columnIndex, Vector vector){
    if (columnIndex==0){
        return vector.zSum();
    } else {
        return dataSet.getColumn(columnIndex-1).dot(vector);
    }
}

Example #18

Source File: SupervisedEmbeddingLoss.java From pyramid with Apache License 2.0

5 votes

public void setParameters(Vector parameters) {
    int numData = this.updatedEmbeddingMatrix.getNumDataPoints();
    int numFeatures = this.updatedEmbeddingMatrix.getNumFeatures();
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeatures; j++) {
            this.updatedEmbeddingMatrix.setFeatureValue(i, j, parameters.get(i * numFeatures + j));
        }
    }
}

Example #19

Source File: LogisticLoss.java From pyramid with Apache License 2.0

5 votes

private Vector penaltyGradient(){
    Vector weightsVector = this.logisticRegression.getWeights().getAllWeights();
    Vector penalty = new DenseVector(weightsVector.size());

    penalty = penalty.plus(weightsVector.divide(priorGaussianVariance));

    for (int j:logisticRegression.getWeights().getAllBiasPositions()){
        penalty.set(j,0);
    }
    return penalty;
}

Example #20

Source File: VectorCardIsoSetCalibrator.java From pyramid with Apache License 2.0

5 votes

public double calibrate(Vector vector){
    double uncalibrated = vector.get(scoreIndex);
    int cardinality = (int)vector.get(cardIndex);
    //deal with unseen cardinality
    if (!calibrations.containsKey(cardinality)){
        return 0;
    }
    return calibrations.get(cardinality).predict(uncalibrated);
}

Example #21

Source File: PluginF1.java From pyramid with Apache License 2.0

5 votes

private MultiLabel predictBySampling(Vector vector){
        List<MultiLabel> samples = cbm.samples(vector, numSamples);
        GeneralF1Predictor generalF1Predictor = new GeneralF1Predictor();
        generalF1Predictor.setMaxSize(maxSize);
        return generalF1Predictor.predict(cbm.getNumClasses(), samples);
//      unique the sample set and apply GFM
//        List<MultiLabel> uniqueSamples = new ArrayList(new HashSet(samples));
//        List<Double> probs = cbm.predictAssignmentProbs(vector, uniqueSamples);
//        return GeneralF1Predictor.predict(cbm.getNumClasses(), uniqueSamples, probs);
    }

Example #22

Source File: DataSetUtil.java From pyramid with Apache License 2.0

5 votes

/**
 * merge to binary dataset
 * k=positive (1), others = negative(0)
 * @param dataSet
 * @param k
 * @return
 */
public static ClfDataSet toBinary(MultiLabelClfDataSet dataSet, int k){
    int numDataPoints = dataSet.getNumDataPoints();
    int numFeatures = dataSet.getNumFeatures();
    boolean missingValue = dataSet.hasMissingValue();
    ClfDataSet clfDataSet;
    if (dataSet.isDense()){
        clfDataSet = new DenseClfDataSet(numDataPoints,numFeatures,missingValue, 2);
    } else {
        clfDataSet = new SparseClfDataSet(numDataPoints,numFeatures,missingValue, 2);
    }

    for (int i=0;i<numDataPoints;i++){
        //only copy non-zero elements
        Vector vector = dataSet.getRow(i);
        for (Vector.Element element: vector.nonZeroes()){
            int featureIndex = element.index();
            double value = element.get();
            clfDataSet.setFeatureValue(i,featureIndex,value);
        }
        if (dataSet.getMultiLabels()[i].matchClass(k)){
            clfDataSet.setLabel(i,1);
        } else {
            clfDataSet.setLabel(i,0);
        }
    }

    List<String> extLabels = new ArrayList<>();
    String extLabel = dataSet.getLabelTranslator().toExtLabel(k);
    extLabels.add("NOT "+extLabel);
    extLabels.add(extLabel);
    LabelTranslator labelTranslator = new LabelTranslator(extLabels);
    clfDataSet.setLabelTranslator(labelTranslator);
    clfDataSet.setFeatureList(dataSet.getFeatureList());


    return clfDataSet;
}

Example #23

Source File: IMLLogisticRegression.java From pyramid with Apache License 2.0

5 votes

/**
 * only consider these assignments
 * @param vector
 * @return
 */
private MultiLabel predictWithConstraints(Vector vector){
    double maxScore = Double.NEGATIVE_INFINITY;
    MultiLabel prediction = null;
    double[] classScores = predictClassScores(vector);
    for (MultiLabel assignment: this.assignments){
        double score = this.calAssignmentScore(assignment,classScores);
        if (score > maxScore){
            maxScore = score;
            prediction = assignment;
        }
    }
    return prediction;
}

Example #24

Source File: L2Boost.java From pyramid with Apache License 2.0

5 votes

@Override
public double predictClassScore(Vector vector, int k) {
    if (k==0){
        return 0;
    } else {
        return getEnsemble(0).score(vector);
    }
}

Example #25

Source File: IntervalSplitterTest.java From pyramid with Apache License 2.0

5 votes

static void test12(){
    RegTreeConfig regTreeConfig = new RegTreeConfig().setNumSplitIntervals(2);
    Vector vector = new DenseVector(4);
    vector.set(0,Double.NaN);
    vector.set(1,Double.NaN);
    vector.set(2,Double.NaN);
    vector.set(3,3);
    double[] probs = {1,0.5,1,0.6};
    double[] labels = {1,2,3,4};
    Splitter.GlobalStats globalStats = new Splitter.GlobalStats(labels,probs);
    List<Interval> intervals = IntervalSplitter.generateIntervals(regTreeConfig, vector, probs, labels,globalStats);
    System.out.println(intervals);
    System.out.println(IntervalSplitter.compress(intervals));

}

Example #26

Source File: LogisticL2DiffFunction.java From laser with Apache License 2.0

5 votes

public LogisticL2DiffFunction(Vector[] a, double[] b, double rho,
			double[] u, double[] z) {
		LOG.info("Initialize LogisticL2DiffFunction");
		this.a = a;
		this.b = b;
		this.rho = rho;
		this.m = a.length;
		if (this.m > 0) {
			this.n = this.a[0].size() - 1;
		} else {
			this.n = 0;
		}

//		Long bytes = (long) 0;
//		for (int row = 0; row < this.m; row++) {
//			Vector v = this.a[row];
//			for (Element e : v.nonZeroes()) {
//				bytes += Integer.SIZE + Double.SIZE;
//			}
//		}
//		LOG.info("Bytes {} reside on this map.", bytes);

		this.u = u;
		this.z = z;
		LOG.info("Initialize LogisticL2DiffFunction Finish");

	}

Example #27

Source File: RegressionTree.java From pyramid with Apache License 2.0

5 votes

private double predictWithMissingValue(Vector vector){
    // use as a simple cache
    int numNodes = this.numNodes;
    boolean[] calculated = new boolean[numNodes];
    double[] probs = new double[numNodes];
    double prediction = 0;
    for (Node leaf: this.leaves){
        double prob = probability(vector,leaf, calculated, probs);
        prediction += prob*leaf.getValue();
    }
    return prediction;
}

Example #28

Source File: FusedKolmogorovFilter.java From pyramid with Apache License 2.0

5 votes

/**
 * always use global min and max
 * @param vector
 * @param inputsEachClass
 * @return
 */
public List<EmpiricalCDF> generateCDFs(Vector vector, List<List<Double>> inputsEachClass){
    double min = vector.minValue();
    double max = vector.maxValue();
    return inputsEachClass.stream().map(list -> new EmpiricalCDF(list,min,max,numBins)).collect(Collectors.toList());

}

Example #29

Source File: DataSetUtil.java From pyramid with Apache License 2.0

5 votes

/**
 * only keep the selected features
 * @param dataSet
 * @return
 */
public static ClfDataSet sampleFeatures(ClfDataSet dataSet, List<Integer> columnsToKeep){
    ClfDataSet trimmed ;
    int numClasses = dataSet.getNumClasses();
    boolean missingValue = dataSet.hasMissingValue();
    // keep density
    if (dataSet.isDense()) {
        trimmed = new DenseClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    } else{
        trimmed = new SparseClfDataSet(dataSet.getNumDataPoints(),columnsToKeep.size(), missingValue, numClasses);
    }


    for (int j=0;j<trimmed.getNumFeatures();j++){
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element: vector.nonZeroes()){
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex,j,value);
        }
    }
    //copy labels
    int[] labels = dataSet.getLabels();
    for (int i=0;i<trimmed.getNumDataPoints();i++){
        trimmed.setLabel(i,labels[i]);
    }

    trimmed.setLabelTranslator(dataSet.getLabelTranslator());
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i=0;i<newFeatures.size();i++){
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));
    return trimmed;
}

Example #30

Source File: CMLCRF.java From pyramid with Apache License 2.0

5 votes

double[] predictClassScores(Vector vector){
    double[] scores = new double[numClasses];
    for (int k=0;k<numClasses;k++){
        scores[k] = predictClassScore(vector, k);
    }
    return scores;
}