org.apache.mahout.math.VectorWritable Java Examples

The following examples show how to use org.apache.mahout.math.VectorWritable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: 1000021_CDbwEvaluator_s.java    From coming with MIT License 6 votes vote down vote up
public double separation() {
  double minDistance = Double.MAX_VALUE;
  for (Map.Entry<Integer, List<VectorWritable>> entry1 : representativePoints.entrySet()) {
    Integer cI = entry1.getKey();
    List<VectorWritable> repI = entry1.getValue();
    for (Map.Entry<Integer, List<VectorWritable>> entry2 : representativePoints.entrySet()) {
      if (cI.equals(entry2.getKey())) {
        continue;
      }
      List<VectorWritable> repJ = entry2.getValue();
      for (VectorWritable aRepI : repI) {
        for (VectorWritable aRepJ : repJ) {
          double distance = measure.distance(aRepI.get(), aRepJ.get());
          if (distance < minDistance) {
            minDistance = distance;
          }
        }
      }
    }
  }
  return minDistance / (1.0 + interClusterDensity());
}
 
Example #2
Source File: LaserMessageConsumer.java    From laser with Apache License 2.0 6 votes vote down vote up
public LaserMessageConsumer(String collection, Path output, FileSystem fs,
		Configuration conf) throws IOException {
	this.collection = collection;
	this.output = output;
	this.fs = fs;
	this.conf = conf;

	Path onlinePath = new Path(output, ONLINE_FOLDER + "/"
			+ Long.toString(onlineVersion));
	onlineWriter = SequenceFile.createWriter(fs, conf, onlinePath,
			Text.class, OnlineVectorWritable.class);
	Path offlinePath = new Path(output, OFFLINE_FOLDER + "/"
			+ Long.toString(offlineVersion));
	offlineWriter = SequenceFile.createWriter(fs, conf, offlinePath,
			Text.class, VectorWritable.class);
}
 
Example #3
Source File: 1000021_TestCDbwEvaluator_s.java    From coming with MIT License 6 votes vote down vote up
@Test
public void testDirichlet() throws Exception {
  ModelDistribution<VectorWritable> modelDistribution =
      new GaussianClusterDistribution(new VectorWritable(new DenseVector(2)));
  DirichletDriver.runJob(getTestTempDirPath("testdata"),
                         getTestTempDirPath("output"),
                         modelDistribution,
                         15,
                         5,
                         1.0,
                         1,
                         true,
                         true,
                         0,
                         true);
  int numIterations = 2;
  Path output = getTestTempDirPath("output");
  CDbwDriver.runJob(new Path(output, "clusters-5"),
                    new Path(output, "clusteredPoints"),
                    output,
                    new EuclideanDistanceMeasure(),
                    numIterations,
                    1);
  checkRefPoints(numIterations);
}
 
Example #4
Source File: 1000021_TestCDbwEvaluator_s.java    From coming with MIT License 6 votes vote down vote up
/**
 * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center
 * @param dC a double cluster center offset
 * @param dP a double representative point offset
* @param measure TODO
 */
private void initData(double dC, double dP, DistanceMeasure measure) {
  clusters = new HashMap<Integer, Cluster>();
  clusters.put(1, new Canopy(new DenseVector(new double[] { -dC, -dC }), 1, measure));
  clusters.put(3, new Canopy(new DenseVector(new double[] { -dC, dC }), 3, measure));
  clusters.put(5, new Canopy(new DenseVector(new double[] { dC, dC }), 5, measure));
  clusters.put(7, new Canopy(new DenseVector(new double[] { dC, -dC }), 7, measure));
  representativePoints = new HashMap<Integer, List<VectorWritable>>();
  for (Cluster cluster : clusters.values()) {
    List<VectorWritable> points = new ArrayList<VectorWritable>();
    representativePoints.put(cluster.getId(), points);
    points.add(new VectorWritable(cluster.getCenter().clone()));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, dP }))));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, -dP }))));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, -dP }))));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, dP }))));
  }
}
 
Example #5
Source File: 1000021_TestCDbwEvaluator_s.java    From coming with MIT License 6 votes vote down vote up
private void checkRefPoints(int numIterations) throws IOException {
  for (int i = 0; i <= numIterations; i++) {
    Path out = new Path(getTestTempDirPath("output"), "representativePoints-" + i);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus file : fs.listStatus(out)) {
      if (!file.getPath().getName().startsWith(".")) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
        try {
          Writable clusterId = new IntWritable(0);
          VectorWritable point = new VectorWritable();
          while (reader.next(clusterId, point)) {
            System.out.println("\tC-" + clusterId + ": " + AbstractCluster.formatVector(point.get(), null));
          }
        } finally {
          reader.close();
        }
      }
    }
  }
}
 
Example #6
Source File: Synthetic2DClusteringPrep.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public static void write(File inputFile, Path outputPath)
    throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);

  SequenceFile.Writer writer =
      SequenceFile.createWriter(fs, conf, outputPath, NullWritable.class,
          VectorWritable.class,
          SequenceFile.CompressionType.BLOCK,
          new DefaultCodec());
  try {
    for (String line : FileUtils.readLines(inputFile)) {
      String parts[] = StringUtils.split(line);

      writer.append(NullWritable.get(),
          new VectorWritable(new DenseVector(
              new double[]{
                  Double.valueOf(parts[0]),
                  Double.valueOf(parts[1])
              }
          )));
    }
  } finally {
    writer.close();
  }
}
 
Example #7
Source File: 1000021_TestCDbwEvaluator_t.java    From coming with MIT License 6 votes vote down vote up
@Test
public void testDirichlet() throws Exception {
  ModelDistribution<VectorWritable> modelDistribution =
      new GaussianClusterDistribution(new VectorWritable(new DenseVector(2)));
  DirichletDriver.runJob(getTestTempDirPath("testdata"),
                         getTestTempDirPath("output"),
                         modelDistribution,
                         15,
                         5,
                         1.0,
                         1,
                         true,
                         true,
                         0,
                         true);
  int numIterations = 2;
  Path output = getTestTempDirPath("output");
  CDbwDriver.runJob(new Path(output, "clusters-5"),
                    new Path(output, "clusteredPoints"),
                    output,
                    new EuclideanDistanceMeasure(),
                    numIterations,
                    1);
  checkRefPoints(numIterations);
}
 
Example #8
Source File: 1000021_TestCDbwEvaluator_t.java    From coming with MIT License 6 votes vote down vote up
/**
 * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center
 * @param dC a double cluster center offset
 * @param dP a double representative point offset
* @param measure the DistanceMeasure
 */
private void initData(double dC, double dP, DistanceMeasure measure) {
  clusters = new HashMap<Integer, Cluster>();
  clusters.put(1, new Canopy(new DenseVector(new double[] { -dC, -dC }), 1, measure));
  clusters.put(3, new Canopy(new DenseVector(new double[] { -dC, dC }), 3, measure));
  clusters.put(5, new Canopy(new DenseVector(new double[] { dC, dC }), 5, measure));
  clusters.put(7, new Canopy(new DenseVector(new double[] { dC, -dC }), 7, measure));
  representativePoints = new HashMap<Integer, List<VectorWritable>>();
  for (Cluster cluster : clusters.values()) {
    List<VectorWritable> points = new ArrayList<VectorWritable>();
    representativePoints.put(cluster.getId(), points);
    points.add(new VectorWritable(cluster.getCenter().clone()));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, dP }))));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, -dP }))));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, -dP }))));
    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, dP }))));
  }
}
 
Example #9
Source File: 1000021_TestCDbwEvaluator_t.java    From coming with MIT License 6 votes vote down vote up
private void checkRefPoints(int numIterations) throws IOException {
  for (int i = 0; i <= numIterations; i++) {
    Path out = new Path(getTestTempDirPath("output"), "representativePoints-" + i);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus file : fs.listStatus(out)) {
      if (!file.getPath().getName().startsWith(".")) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
        try {
          Writable clusterId = new IntWritable(0);
          VectorWritable point = new VectorWritable();
          while (reader.next(clusterId, point)) {
            System.out.println("\tC-" + clusterId + ": " + AbstractCluster.formatVector(point.get(), null));
          }
        } finally {
          reader.close();
        }
      }
    }
  }
}
 
Example #10
Source File: Step5.java    From recsys-offline with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  
    // TODO Auto-generated method stub  
    Configuration conf1 = new Configuration();  

    Job job1 = new Job(conf1, "wiki  job five");  
    job1.setNumReduceTasks(1);  
    job1.setJarByClass(Step5.class);  
    job1.setInputFormatClass(SequenceFileInputFormat.class);  
    job1.setMapperClass(WikiMapper5.class);  
    job1.setMapOutputKeyClass(VarLongWritable.class);  
    job1.setMapOutputValueClass(VectorWritable.class);  
      
    job1.setCombinerClass(WiKiCombiner5.class);  
    job1.setReducerClass(WiKiReducer5.class);  
    job1.setOutputKeyClass(VarLongWritable.class);  
    job1.setOutputValueClass(RecommendedItemsWritable.class);  
//   job1.setOutputFormatClass(SequenceFileOutputFormat.class);  
    SequenceFileInputFormat.addInputPath(job1, new Path(INPUT_PATH));  
  
    FileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH));     
    if(!job1.waitForCompletion(true)){  
        System.exit(1); // run error then exit  
    }  
}
 
Example #11
Source File: 1000021_CDbwEvaluator_s.java    From coming with MIT License 6 votes vote down vote up
private void setStDev(int cI) {
  List<VectorWritable> repPts = representativePoints.get(cI);
  //if (repPts == null) {
  //  System.out.println();
  //}
  int s0 = 0;
  Vector s1 = null;
  Vector s2 = null;
  for (VectorWritable vw : repPts) {
    s0++;
    Vector v = vw.get();
    s1 = s1 == null ? v.clone() : s1.plus(v);
    s2 = s2 == null ? v.times(v) : s2.plus(v.times(v));
  }
  Vector std = s2.times(s0).minus(s1.times(s1)).assign(new SquareRootFunction()).divide(s0);
  double d = std.zSum() / std.size();
  //System.out.println("stDev[" + cI + "]=" + d);
  stDevs.put(cI, d);
}
 
Example #12
Source File: 1000021_CDbwEvaluator_s.java    From coming with MIT License 6 votes vote down vote up
double interDensity(Vector uIJ, int cI, int cJ) {
  List<VectorWritable> repI = representativePoints.get(cI);
  List<VectorWritable> repJ = representativePoints.get(cJ);
  double density = 0.0;
 double std = (stDevs.get(cI) + stDevs.get(cJ)) / 2.0;
  for (VectorWritable vwI : repI) {
    if (measure.distance(uIJ, vwI.get()) <= std) {
      density++;
    }
  }
  for (VectorWritable vwJ : repJ) {
    if (measure.distance(uIJ, vwJ.get()) <= std) {
      density++;
    }
  }
  return density / (repI.size() + repJ.size());
}
 
Example #13
Source File: 1000021_CDbwEvaluator_t.java    From coming with MIT License 6 votes vote down vote up
private void setStDev(int cI) {
   List<VectorWritable> repPts = representativePoints.get(cI);
   //if (repPts == null) {
   //  System.out.println();
   //}
   int s0 = 0;
   Vector s1 = null;
   Vector s2 = null;
   for (VectorWritable vw : repPts) {
     s0++;
     Vector v = vw.get();
     s1 = s1 == null ? v.clone() : s1.plus(v);
     s2 = s2 == null ? v.times(v) : s2.plus(v.times(v));
   }
  if (s0 > 1) {
   Vector std = s2.times(s0).minus(s1.times(s1)).assign(new SquareRootFunction()).divide(s0);
   double d = std.zSum() / std.size();
   //System.out.println("stDev[" + cI + "]=" + d);
   stDevs.put(cI, d);
 }
}
 
Example #14
Source File: 1000021_CDbwEvaluator_t.java    From coming with MIT License 6 votes vote down vote up
double interDensity(Vector uIJ, int cI, int cJ) {
  List<VectorWritable> repI = representativePoints.get(cI);
  List<VectorWritable> repJ = representativePoints.get(cJ);
  double density = 0.0;
 double std = (getStdev(cI) + getStdev(cJ)) / 2.0;
  for (VectorWritable vwI : repI) {
    if (measure.distance(uIJ, vwI.get()) <= std) {
      density++;
    }
  }
  for (VectorWritable vwJ : repJ) {
    if (measure.distance(uIJ, vwJ.get()) <= std) {
      density++;
    }
  }
  return density / (repI.size() + repJ.size());
}
 
Example #15
Source File: 1000021_CDbwEvaluator_t.java    From coming with MIT License 6 votes vote down vote up
public double separation() {
  double minDistance = Double.MAX_VALUE;
  for (Map.Entry<Integer, List<VectorWritable>> entry1 : representativePoints.entrySet()) {
    Integer cI = entry1.getKey();
    List<VectorWritable> repI = entry1.getValue();
    for (Map.Entry<Integer, List<VectorWritable>> entry2 : representativePoints.entrySet()) {
      if (cI.equals(entry2.getKey())) {
        continue;
      }
      List<VectorWritable> repJ = entry2.getValue();
      for (VectorWritable aRepI : repI) {
        for (VectorWritable aRepJ : repJ) {
          double distance = measure.distance(aRepI.get(), aRepJ.get());
          if (distance < minDistance) {
            minDistance = distance;
          }
        }
      }
    }
  }
  return minDistance / (1.0 + interClusterDensity());
}
 
Example #16
Source File: Step2.java    From recsys-offline with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {  

        Configuration conf1 = new Configuration();  

        Job job1 = new Job(conf1, "wiki  job two");  
        job1.setNumReduceTasks(1);  
        job1.setJarByClass(Step2.class);  
        job1.setInputFormatClass(SequenceFileInputFormat.class);  
        job1.setMapperClass(WikiMapper2.class);  
        job1.setMapOutputKeyClass(IntWritable.class);  
        job1.setMapOutputValueClass(IntWritable.class);  
        job1.setReducerClass(WiKiReducer2.class);  
        job1.setOutputKeyClass(IntWritable.class);  
        job1.setOutputValueClass(VectorWritable.class);  
        job1.setOutputFormatClass(SequenceFileOutputFormat.class);  
        SequenceFileInputFormat.addInputPath(job1, new Path(INPUT_PATH));  
        SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH));     
        if(!job1.waitForCompletion(true)){  
            System.exit(1); // run error then exit  
        }  
    }
 
Example #17
Source File: Step1.java    From recsys-offline with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Configuration conf1 = new Configuration();

    Job job1 = new Job(conf1, "step1");
    job1.setOutputFormatClass(SequenceFileOutputFormat.class);
    job1.setNumReduceTasks(1);
    job1.setJarByClass(Step1.class);
    job1.setMapperClass(WikiMapper1.class);
    job1.setMapOutputKeyClass(VarLongWritable.class);
    job1.setMapOutputValueClass(LongAndFloat.class);
    job1.setReducerClass(WiKiReducer1.class);
    job1.setOutputKeyClass(VarLongWritable.class);
    job1.setOutputValueClass(VectorWritable.class);

    FileInputFormat.addInputPath(job1, new Path( INPUT_PATH ) );
    SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH ));
    if (!job1.waitForCompletion(true)) {
        System.exit(1);
    }
}
 
Example #18
Source File: Step32.java    From recsys-offline with Apache License 2.0 6 votes vote down vote up
public void map(VarLongWritable key,VectorWritable value,Context context) throws IOException, InterruptedException{  

                long userID=key.get();  
                Vector userVector=value.get();  
                Iterator<Vector.Element> it=userVector.nonZeroes().iterator();  
                IntWritable itemi=new IntWritable();  
                while(it.hasNext()){  
                    Vector.Element e=it.next();  
                    int itemIndex=e.index();  
                    float preferenceValue=(float)e.get();  
                    itemi.set(itemIndex);  
                    context.write(itemi, new VectorOrPrefWritable(userID,preferenceValue));  
                   System.out.println("item :"+itemi+",userand val:"+userID+","+preferenceValue);  
                } 
              
        }
 
Example #19
Source File: Step5.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void map(IntWritable key,VectorAndPrefsWritable vectorAndPref,Context context) throws IOException, InterruptedException{  
    Vector coo=vectorAndPref.getVector();  
    List<Long> userIds=vectorAndPref.getUserIDs();  
    List<Float> prefValues=vectorAndPref.getValues();  
    //System.out.println("alluserids:"+userIds);  
    for(int i=0;i<userIds.size();i++){  
        long userID=userIds.get(i);  
        float prefValue=prefValues.get(i);  
        Vector par=coo.times(prefValue);  
        context.write(new VarLongWritable(userID), new VectorWritable(par));  
        System.out.println(",userid:"+userID+",vector:"+par);  //  if the user id = 3 is the same as my paper then is right  
    }  
//  System.out.println();     
}
 
Example #20
Source File: 1000021_TestCDbwEvaluator_s.java    From coming with MIT License 5 votes vote down vote up
@Override
@Before
public void setUp() throws Exception {
  super.setUp();
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  // Create test data
  List<VectorWritable> sampleData = TestKmeansClustering.getPointsWritable(REFERENCE);
  ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
}
 
Example #21
Source File: Step5.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void reduce(VarLongWritable key, Iterable<VectorWritable> values,Context context) throws IOException, InterruptedException{  
    Vector partial=null;  
    for(VectorWritable v:values){  
        partial=partial==null?v.get():partial.plus(v.get());  
    }  
    context.write(key, new VectorWritable(partial));  
    System.err.println("userid:"+key.toString()+",vecotr:"+partial);//   here also should be the same as my paper's result  
}
 
Example #22
Source File: Step5.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void reduce(VarLongWritable key, Iterable<VectorWritable> values,Context context) throws IOException, InterruptedException{  
  
    int userID=(int)key.get();  
    Vector rev=null;  
    for(VectorWritable vec:values){  
        rev=rev==null? vec.get():rev.plus(vec.get());  
    }  
    Queue<RecommendedItem>topItems=new PriorityQueue<RecommendedItem>( recommendationsPerUser+1,  Collections.reverseOrder(ByValueRecommendedItemComparator.getInstance())   );  
    Iterator<Vector.Element>recommendationVectorIterator=  rev.nonZeroes().iterator();  
    while(recommendationVectorIterator.hasNext()){  
        Vector.Element e=recommendationVectorIterator.next();  
        int index=e.index();  
        System.out.println("Vecotr.element.indxe:"+index);  //  test here  find the index is item id or not  ** test result : index is item  
        if(!hasItem(userID,String.valueOf(index))){  
            float value=(float) e.get();  
            if(topItems.size()<recommendationsPerUser){  
                //  here only set index  
                topItems.add(new GenericRecommendedItem(index,value));  
            }else if(value>topItems.peek().getValue()){  
                topItems.add(new GenericRecommendedItem(index,value));  
                topItems.poll();  
            }  
        }  
    }  
    List<RecommendedItem>recom=new ArrayList<RecommendedItem>(topItems.size());  
    recom.addAll(topItems);  
    Collections.sort(recom,ByValueRecommendedItemComparator.getInstance());  
    context.write(key, new RecommendedItemsWritable(recom));          
}
 
Example #23
Source File: Step2.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void map(VarLongWritable userID,VectorWritable userVector,Context context) throws IOException, InterruptedException{  
    Iterator<Vector.Element> it=userVector.get().nonZeroes().iterator();  
    while(it.hasNext()){ 
        int index1=it.next().index();
       System.err.println("index1:"+index1);  
        Iterator<Vector.Element> it2=userVector.get().nonZeroes().iterator();  
        while(it2.hasNext()){  
            int index2=it2.next().index();  

            context.write(new IntWritable(index1), new IntWritable(index2));  
        }  
    }  
}
 
Example #24
Source File: AdmmIterationMapper.java    From laser with Apache License 2.0 5 votes vote down vote up
protected void map(Text key, VectorWritable value, Context context)
		throws IOException, InterruptedException {
	// ignore per clustering records
	if (key.toString().contains("|clustering")) {
		return;
	}
	Vector v = value.get();
	if (addIntercept) {
		v.set(0, 1.0);
	}
	inputSplitData.add(v);
}
 
Example #25
Source File: LaserMessageConsumer.java    From laser with Apache License 2.0 5 votes vote down vote up
public Path nextOfflinePath() throws IOException {
	synchronized (offlineWriter) {
		offlineWriter.close();
		Path ret = new Path(output, OFFLINE_FOLDER + "/"
				+ Long.toString(offlineVersion));
		offlineVersion++;
		Path offlinePath = new Path(output, ONLINE_FOLDER + "/"
				+ Long.toString(offlineVersion));
		LOG.info("Update offline feature output path, to {}", offlinePath);

		offlineWriter = SequenceFile.createWriter(fs, conf, offlinePath,
				Text.class, VectorWritable.class);
		return ret;
	}
}
 
Example #26
Source File: Step2.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void reduce(IntWritable itemIndex1,Iterable<IntWritable> itemPrefs,Context context) throws IOException, InterruptedException{  
    // RandomAccessSparseVector(int cardinality, int initialCapacity)   
    Vector itemVector=new RandomAccessSparseVector(Integer.MAX_VALUE,10);  
    for(IntWritable itemPref:itemPrefs){  
        int itemIndex2=itemPref.get();  
        itemVector.set(itemIndex2, itemVector.get(itemIndex2)+1.0);  
    }  
    context.write(itemIndex1, new VectorWritable(itemVector));  
  System.out.println(itemIndex1+"  ,"+itemVector);  
}
 
Example #27
Source File: Step1.java    From recsys-offline with Apache License 2.0 5 votes vote down vote up
public void reduce(VarLongWritable userID,   Iterable<LongAndFloat> itemPrefs, Context context)   throws IOException, InterruptedException {
    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 10);
    for (LongAndFloat itemPref : itemPrefs) {
        userVector.set( Integer.parseInt(itemPref.getFirst().toString()),  Float.parseFloat(itemPref.getSecond().toString()));
    }
    context.write(userID, new VectorWritable(userVector));
}
 
Example #28
Source File: 1000021_CDbwEvaluator_s.java    From coming with MIT License 5 votes vote down vote up
/**
 * For testing only
 * 
 * @param representativePoints
 *            a Map<Integer,List<VectorWritable>> of representative points keyed by clusterId
 * @param clusters
 *            a Map<Integer,Cluster> of the clusters keyed by clusterId
 * @param measure
 *            an appropriate DistanceMeasure
 */
public CDbwEvaluator(Map<Integer, List<VectorWritable>> representativePoints,
                     Map<Integer, Cluster> clusters,
                     DistanceMeasure measure) {
  this.representativePoints = representativePoints;
  this.clusters = clusters;
  this.measure = measure;
  for (Integer cId : representativePoints.keySet()) {
    setStDev(cId);
  }
}
 
Example #29
Source File: 1000021_CDbwReducer_s.java    From coming with MIT License 5 votes vote down vote up
@Override
protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context) throws IOException,
    InterruptedException {
  // find the most distant point
  WeightedVectorWritable mdp = null;
  for (WeightedVectorWritable dpw : values) {
    if (mdp == null || mdp.getWeight() < dpw.getWeight()) {
      mdp = new WeightedVectorWritable(dpw.getWeight(), dpw.getVector());
    }
  }
  context.write(new IntWritable(key.get()), new VectorWritable(mdp.getVector()));
}
 
Example #30
Source File: 1000021_CDbwReducer_t.java    From coming with MIT License 5 votes vote down vote up
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
 for (Map.Entry<Integer, List<VectorWritable>> entry : representativePoints.entrySet()) {
    IntWritable iw = new IntWritable(entry.getKey());
    for (VectorWritable vw : entry.getValue()) {
      context.write(iw, vw);
    }
  }
  super.cleanup(context);
}