Java Code Examples for org.apache.mahout.math.Vector#set()

The following examples show how to use org.apache.mahout.math.Vector#set() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KMeans.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private void updateCenters(int k){

        Vector center = new DenseVector(dataSet.getNumFeatures());
        double count = 0;
        for (int i=0;i<dataSet.getNumDataPoints();i++){
            if (assignments[i]==k){
                Vector instance = dataSet.getRow(i);
                for (int j=0;j<instance.size();j++){
                    center.set(j, center.get(j)+instance.get(j));
                }
                count += 1;
            }
        }
        center = center.divide(count);
        centers[k] = center;
        System.out.println("update the centroid of cluster "+(k+1)+" based on "+(int)count+" instances in the cluster");
    }
 
Example 2
Source File: FusedKolmogorovFilterTest.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private static void test1(){
    Vector vector = new DenseVector(10);
    vector.set(0,0.1);
    vector.set(1,0.2);
    vector.set(2,0.15);
    vector.set(3,0.4);
    vector.set(4,0.7);
    vector.set(8,0.9);
    int[] labels = new int[10];
    labels[0] = 0 ;
    labels[1] = 1;
    labels[2] = 1;
    labels[3] = 1;
    labels[9] = 1;
    FusedKolmogorovFilter filter = new FusedKolmogorovFilter();
    filter.setNumBins(10);
    List<List<Double>> inputsEachClass = filter.generateInputsEachClass(vector, labels, 2);
    System.out.println(inputsEachClass);
    List<EmpiricalCDF> empiricalCDFs = filter.generateCDFs(vector,inputsEachClass);
    System.out.println(empiricalCDFs);
    System.out.println(filter.maxDistance(empiricalCDFs));
}
 
Example 3
Source File: NoiseOptimizerLR.java    From pyramid with Apache License 2.0 6 votes vote down vote up
private void updateTransformProb(int dataPoint, int comIndex){
    MultiLabel labels = dataSet.getMultiLabels()[dataPoint];
    MultiLabel candidate = combinations.get(comIndex);
    Vector toMinus = new DenseVector(dataSet.getNumClasses());
    for (int i=0;i<dataSet.getNumClasses();i++){
        toMinus.set(i,0.5);
    }
    double prod = 1;
    for (int l = 0; l < dataSet.getNumClasses(); l++) {
        if (labels.matchClass(l)) {
            prod *= this.lrTransforms.get(l).predictClassProb(candidate.toVector(dataSet.getNumClasses()).minus(toMinus), 1);
        } else {
            prod *= this.lrTransforms.get(l).predictClassProb(candidate.toVector(dataSet.getNumClasses()).minus(toMinus), 0);
        }
    }
    transformProbs[dataPoint][comIndex] = prod;
}
 
Example 4
Source File: IntervalSplitterTest.java    From pyramid with Apache License 2.0 5 votes vote down vote up
static void test10(){
    RegTreeConfig regTreeConfig = new RegTreeConfig().setNumSplitIntervals(2);
    Vector vector = new DenseVector(4);
    vector.set(0,Double.NaN);
    vector.set(1,1);
    vector.set(2,2);
    vector.set(3,3);
    double[] probs = {1,0.5,1,0.6};
    double[] labels = {1,2,3,4};
    Splitter.GlobalStats globalStats = new Splitter.GlobalStats(labels,probs);
    List<Interval> intervals = IntervalSplitter.generateIntervals(regTreeConfig, vector, probs, labels,globalStats);
    System.out.println(intervals);
    System.out.println(IntervalSplitter.compress(intervals));

}
 
Example 5
Source File: Step1.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void reduce(VarLongWritable userID,   Iterable<LongAndFloat> itemPrefs, Context context)   throws IOException, InterruptedException {
    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 10);
    for (LongAndFloat itemPref : itemPrefs) {
        userVector.set( Integer.parseInt(itemPref.getFirst().toString()),  Float.parseFloat(itemPref.getSecond().toString()));
    }
    context.write(userID, new VectorWritable(userVector));
}
 
Example 6
Source File: BM.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * sample a vector from the mixture distribution
 * @return
 */
public Vector sample(){
    Vector vector = new DenseVector(dimension);
    // first sample cluster
    int[] clusters = IntStream.range(0,numClusters).toArray();
    EnumeratedIntegerDistribution enumeratedIntegerDistribution = new EnumeratedIntegerDistribution(clusters,mixtureCoefficients);
    int cluster = enumeratedIntegerDistribution.sample();
    // then sample each dimension
    for (int d=0;d<dimension;d++){
        vector.set(d,distributions[cluster][d].sample());
    }
    return vector;
}
 
Example 7
Source File: Calibration.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private static Vector[] loadFeatures(String file) throws Exception{
    List<String> lines = FileUtils.readLines(new File(file));
    Vector[] scores = new Vector[lines.size()];
    for (int i=0;i<lines.size();i++){
        String split = lines.get(i).split(Pattern.quote("("))[1].replace(")","");
        String[] features = split.split(",");
        Vector vector = new DenseVector(features.length);
        for (int j=0;j<features.length;j++){
            vector.set(j,Double.parseDouble(features[j].trim()));
        }
        scores[i] = vector;
    }
    return scores;
}
 
Example 8
Source File: PlattScaling.java    From pyramid with Apache License 2.0 5 votes vote down vote up
@Override
public double predictClassProb(Vector vector, int classIndex) {
    double score = scoreEstimator.predictClassScore(vector,classIndex);
    Vector scoreVector = new DenseVector(1);
    scoreVector.set(0,score);
    return logisticRegressions.get(classIndex).predictClassProb(scoreVector,1);
}
 
Example 9
Source File: MultiLabel.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * return binary vector
 * @param length
 * @return
 */
public Vector toVectorRandomSparse(int length){
    Vector vector = new RandomAccessSparseVector(length);
    for (int i = labels.nextSetBit(0); i >= 0; i = labels.nextSetBit(i+1)){
        vector.set(i,1);
    }
    return vector;
}
 
Example 10
Source File: LogisticLoss.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private Vector penaltyGradient(){
    Vector weightsVector = this.logisticRegression.getWeights().getAllWeights();
    Vector penalty = new DenseVector(weightsVector.size());

    penalty = penalty.plus(weightsVector.divide(priorGaussianVariance));

    for (int j:logisticRegression.getWeights().getAllBiasPositions()){
        penalty.set(j,0);
    }
    return penalty;
}
 
Example 11
Source File: RidgeBinaryLogisticLoss.java    From pyramid with Apache License 2.0 5 votes vote down vote up
public void grad(Vector w, Vector g) {

        int[] y = labels;
        for (int i = 0; i < numRows; i++) {
            scores.set(i, 1 / (1 + Math.exp(-y[i] * scores.get(i))));
            diagonals.set(i, scores.get(i) * (1 - scores.get(i)));
            scores.set(i, regularization.get(i) * (scores.get(i) - 1) * y[i]);
            //it seems that scores are messed up at this point of time
        }
        XTv(scores, g);

        for (int i=0;i<g.size();i++){
            g.set(i,w.get(i)+g.get(i));
        }
    }
 
Example 12
Source File: BasicFeatureEncoder.java    From ml-models with Apache License 2.0 5 votes vote down vote up
Vector getVector(Map<String, Object> features) {
    Vector v = new DenseVector(vectorSize);
    if (hasIntercept) {
        v.set(0, 1);
    }
    for (Map.Entry<String, Object> feature : features.entrySet()) {
        v.set(offsets.get(feature.getKey()), (double) feature.getValue());
    }
    return v;
}
 
Example 13
Source File: PriorFeatureExtractor.java    From pyramid with Apache License 2.0 5 votes vote down vote up
@Override
public Vector extractFeatures(PredictionCandidate prediction) {
    Vector vector = new DenseVector(1);
    double prior = priors.getOrDefault(prediction.multiLabel,0.0);
    vector.set(0,prior);
    return vector;
}
 
Example 14
Source File: SupervisedEmbeddingLoss.java    From pyramid with Apache License 2.0 5 votes vote down vote up
public Vector getGradient() {
    int numData = this.updatedEmbeddingMatrix.getNumDataPoints();
    int numFeatures = this.updatedEmbeddingMatrix.getNumFeatures();
    int vecSize = numData * numFeatures;
    Vector finalGradient = new DenseVector(vecSize);

    for (int i = 0; i < numData; i++) {
        Vector gradient = new DenseVector(numFeatures);
        Vector q_i = this.updatedEmbeddingMatrix.getRow(i);
        Vector q_i_orig = this.embeddingMatrix.getRow(i);
        gradient = gradient.plus(q_i.minus(q_i_orig).times(2.0 * this.alpha));

        for (int j = 0; j < numData; j++) {
            Vector q_j = this.updatedEmbeddingMatrix.getRow(j);
            double pi_x = this.projMatrix.getColumn(0).dot(q_i);
            double pi_y = this.projMatrix.getColumn(1).dot(q_i);
            double pj_x = this.projMatrix.getColumn(0).dot(q_j);
            double pj_y = this.projMatrix.getColumn(1).dot(q_j);
            double p_sq = (pi_x - pj_x) * (pi_x - pj_x) + (pi_y - pj_y) * (pi_y - pj_y);
            double d_sq = this.distMatrix.getRow(i).get(j) * this.distMatrix.getRow(i).get(j);
            Vector p_dist_vec = new DenseVector(2);
            p_dist_vec.set(0, pi_x - pj_x);
            p_dist_vec.set(1, pi_y - pj_y);
            Vector tmp = new DenseVector(this.projMatrix.getNumDataPoints());
            for (int k = 0; k < this.projMatrix.getNumDataPoints(); k++) {
                tmp.set(k, this.projMatrix.getRow(k).dot(p_dist_vec));
            }
            gradient = gradient.plus(tmp.times(4.0 * this.beta * (p_sq - d_sq)));
        }

        for (int j = 0; j < numFeatures; j++) {
            finalGradient.set(i * numFeatures + j, gradient.get(j));
        }
    }
    return finalGradient;
}
 
Example 15
Source File: SerializableVectorTest.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private static void test1() throws Exception{
    Vector vector = new SequentialAccessSparseVector(10);
    vector.set(0,2);
    vector.set(5,8);
    SerializableVector serializableVector = new SerializableVector(vector);
    Serialization.serialize(serializableVector, new File(TMP,"v.ser"));
    Vector loaded = ((SerializableVector)Serialization.deserialize(new File(TMP,"v.ser"))).getVector();
    System.out.println(loaded.size());
    System.out.println(loaded.getClass().getName());
    System.out.println(loaded);
}
 
Example 16
Source File: LogisticRegressionTest.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private static void test4() {
    double[] prior = {0.3, 0.7};
    LogisticRegression logisticRegression = new LogisticRegression(2, 10, prior);
    Vector vector = new DenseVector(10);
    for (int d=0;d<10;d++){
        vector.set(d, Math.random());
    }
    System.out.println(Arrays.toString(logisticRegression.predictClassProbs(vector)));
}
 
Example 17
Source File: TrustRegionNewtonOptimizer.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * scales a vector by a constant
 */
private static void scale(double constant, Vector vector) {
    if (constant == 1.0) return;
    for (int i = 0; i < vector.size(); i++) {
        vector.set(i, vector.get(i) * constant);
    }

}
 
Example 18
Source File: MultiLabelSynthesizer.java    From pyramid with Apache License 2.0 4 votes vote down vote up
public static MultiLabelClfDataSet flipTwo(int numData, int numFeature, int numClass){
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature)
            .numClasses(numClass)
            .numDataPoints(numData)
            .build();

    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k=0;k<numClass;k++){
        Vector vector = new DenseVector(numFeature);
        for (int j=0;j<numFeature;j++){
            vector.set(j,Sampling.doubleUniform(-1,1));
        }
        weights[k] = vector;
    }

    // generate features
    for (int i=0;i<numData;i++){
        for (int j=0;j<numFeature;j++){
            dataSet.setFeatureValue(i,j,Sampling.doubleUniform(-1, 1));
        }
    }

    // assign labels
    for (int i=0;i<numData;i++){
        for (int k=0;k<numClass;k++){
            double dot = weights[k].dot(dataSet.getRow(i));
            if (dot>=0){
                dataSet.addLabel(i,k);
            }
        }
    }


    // flip
    for (int i=0;i<numData;i++){
        int toChange = Sampling.intUniform(0,numClass-1);
        MultiLabel label = dataSet.getMultiLabels()[i];
        if (label.matchClass(toChange)){
            label.removeLabel(toChange);
        } else {
            label.addLabel(toChange);
        }
        if (toChange==0){
            int another = Sampling.intUniform(1,numClass-1);
            if (label.matchClass(another)){
                label.removeLabel(another);
            } else {
                label.addLabel(another);
            }
        }

    }


    return dataSet;
}
 
Example 19
Source File: PlattScaling.java    From pyramid with Apache License 2.0 4 votes vote down vote up
public double transform(double uncalibrated){
    Vector vector = new DenseVector(1);
    vector.set(0, uncalibrated);
    return logisticRegression.predictClassProb(vector,1);
}
 
Example 20
Source File: RegressionTreeTest.java    From pyramid with Apache License 2.0 4 votes vote down vote up
private static void test2(){
    Node a = new Node();
    a.setFeatureIndex(0);
    a.setThreshold(0.0);
    a.setLeftProb(0.3);
    a.setRightProb(0.7);

    Node b = new Node();
    b.setFeatureIndex(1);
    b.setThreshold(0.1);
    b.setLeftProb(0.8);
    b.setRightProb(0.2);

    Node c = new Node();
    c.setFeatureIndex(2);
    c.setThreshold(0.2);
    c.setLeftProb(0.1);
    c.setRightProb(0.9);

    Node d = new Node();
    d.setLeaf(true);
    d.setValue(1);

    Node e = new Node();
    e.setLeaf(true);
    e.setValue(2);

    Node f = new Node();
    f.setLeaf(true);
    f.setValue(3);

    Node g = new Node();
    g.setLeaf(true);
    g.setValue(4);

    a.setLeftChild(b);
    a.setRightChild(c);
    b.setLeftChild(d);
    b.setRightChild(e);
    c.setLeftChild(f);
    c.setRightChild(g);

    RegressionTree tree = new RegressionTree();
    tree.root = a;
    tree.leaves.add(d);
    tree.leaves.add(e);
    tree.leaves.add(f);
    tree.leaves.add(g);

    Vector vector1 = new DenseVector(3);
    vector1.set(0,-1);
    vector1.set(1,Double.NaN);
    vector1.set(2,Double.NaN);

    System.out.println(tree.probability(vector1,a));
    System.out.println(tree.probability(vector1,b));
    System.out.println(tree.probability(vector1,c));
    System.out.println(tree.probability(vector1,d));
    System.out.println(tree.probability(vector1,e));
    System.out.println(tree.probability(vector1,f));
    System.out.println(tree.probability(vector1,g));
    System.out.println(tree.predict(vector1));
}