org.apache.pig.data.DataBag Java Examples

The following examples show how to use org.apache.pig.data.DataBag. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Distinct.java    From spork with Apache License 2.0 6 votes vote down vote up
static private DataBag getDistinctFromNestedBags(Tuple input, EvalFunc evalFunc) throws IOException {
    DataBag result = createDataBag();
    long progressCounter = 0;
    try {
        DataBag bg = (DataBag)input.get(0);
        if (bg == null) {
            return result;
        }
        for (Tuple tuple : bg) {
            // Each tuple has a single column
            // which is a bag. Get tuples out of it
            // and distinct over all tuples
            for (Tuple t : (DataBag)tuple.get(0)) {
                result.add(t);
                ++progressCounter;
                if((progressCounter % 1000) == 0){
                    evalFunc.progress();
                }
            }
        }
    } catch (ExecException e) {
       throw e;
    }
    return result;
}
 
Example #2
Source File: TestBagFormat.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBagFormat() throws Exception {
    DataBag bag = BagFactory.getInstance().newDefaultBag();

    Tuple tuple_1 = TupleFactory.getInstance().newTuple(1);
    tuple_1.set(0, 12);
    bag.add(tuple_1);

    Tuple tuple_2 = TupleFactory.getInstance().newTuple(1);
    DataBag innerBag = BagFactory.getInstance().newDefaultBag();
    innerBag.add(tuple_1);
    tuple_2.set(0, (innerBag));
    bag.add(tuple_2);

    System.out.println(BagFormat.format(bag));
    assertEquals("{(12),({(12)})}", BagFormat.format(bag));
}
 
Example #3
Source File: DoubleAvg.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Double exec(Tuple input) throws IOException {
    try {
        DataBag b = (DataBag)input.get(0);
        Tuple combined = combine(b);

        Double sum = (Double)combined.get(0);
        if(sum == null) {
            return null;
        }
        double count = (Long)combined.get(1);

        Double avg = null;
        if (count > 0) {
            avg = new Double(sum / count);
        }
        return avg;
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing average in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);            
    }
}
 
Example #4
Source File: TestPigStreaming.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testSerialize__bag() throws IOException {
    Tuple t = tf.newTuple(1);
    Tuple t1 = tf.newTuple(2);
    Tuple t2 = tf.newTuple(2);
    List<Tuple> bagTuples = new ArrayList<Tuple>();
    bagTuples.add(t1);
    bagTuples.add(t2);
    t1.set(0, "A");
    t1.set(1, "B");
    t2.set(0, 1);
    t2.set(1, 2);
    DataBag b = DefaultBagFactory.getInstance().newDefaultBag(bagTuples);
    t.set(0,b);
    byte[] expectedOutput = "{(A,B),(1,2)}\n".getBytes();
    byte[] output = ps.serialize(t);
    Assert.assertArrayEquals(expectedOutput, output);
}
 
Example #5
Source File: TestMapReduce.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    try {
        DataBag output = BagFactory.getInstance().newDefaultBag();
        Iterator<Tuple> it = (DataType.toBag(input.get(0))).iterator();
        while(it.hasNext()) {
            Tuple t = it.next();
            Tuple newT = TupleFactory.getInstance().newTuple(2);
            newT.set(0, field0);
            newT.set(1, t.get(0).toString());
            output.add(newT);
        }

        return output;
    } catch (ExecException ee) {
        IOException ioe = new IOException(ee.getMessage());
        ioe.initCause(ee);
        throw ioe;
    }
}
 
Example #6
Source File: IteratingAccumulatorEvalFunc.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void accumulate(Tuple input) throws IOException {
    if (!isInitialized)
        initialize();

    for (Tuple t : (DataBag)input.get(0)) {
        if (isFinished)
            return;

        boolean added = false;
        while (!isFinished && !added && !exceptionThrown)
            try {
                added = queue.offer(t, WAIT_TO_OFFER, TimeUnit.MILLISECONDS);
            } catch (InterruptedException e) {
            } //TODO handle the exception?

        if (exceptionThrown)
            throw new RuntimeException("Exception thrown in thread: ", executionThreadException);
    }
}
 
Example #7
Source File: TestConversions.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBytesToBagWithConversion() throws IOException {
    DataBag b = GenRandomData.genFloatDataBag(r,5,100);
    ResourceFieldSchema fs = GenRandomData.getFloatDataBagFieldSchema(5);
    DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);

    Iterator<Tuple> iter1 = b.iterator();
    Iterator<Tuple> iter2 = convertedBag.iterator();
    for (int i=0;i<100;i++) {
        Tuple t1 = (Tuple)iter1.next();
        assertTrue(iter2.hasNext());
        Tuple t2 = (Tuple)iter2.next();
        for (int j=0;j<5;j++) {
            assertTrue(t2.get(j) instanceof Integer);
            Integer expectedValue = ((Float)t1.get(j)).intValue();
            assertEquals(expectedValue, t2.get(j));
        }
    }
}
 
Example #8
Source File: LineageTrimmingVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
public LineageTrimmingVisitor(LogicalPlan plan,
        Map<LOLoad, DataBag> baseData,
        ExampleGenerator eg,
        Map<Operator, PhysicalOperator> LogToPhyMap,
        PhysicalPlan physPlan, PigContext pc) throws IOException, InterruptedException {
    super(plan, new PreOrderDepthFirstWalker(plan));
    // this.baseData.putAll(baseData);
    this.baseData = baseData;
    this.plan = plan;
    this.LogToPhyMap = LogToPhyMap;
    this.pc = pc;
    this.physPlan = physPlan;
    this.eg = eg;
    this.inputToDataMap = new HashMap<FileSpec, DataBag>();
    init();
}
 
Example #9
Source File: INVERSEMAP.java    From spork with Apache License 2.0 6 votes vote down vote up
private HashMap<String, DataBag> doInverse(Map<String,Object> original) throws ExecException {
    final HashMap<String, DataBag> inverseMap = new HashMap<String, DataBag>(original.size());

    for (Map.Entry<String, Object> entry : original.entrySet()) {
        Object o = entry.getValue();
        String newKey;

        // Call toString for all primitive types, else throw an Exception
        if (!(o instanceof Tuple || o instanceof DataBag)) {
            newKey = o.toString();
        } else {
            throw new ExecException("Wrong type. Value is of type " + o.getClass());
        }

        // Create a new bag if "newKey" does not exist in Map
        DataBag bag = inverseMap.get(newKey);
        if (bag == null) {
            bag = new NonSpillableDataBag();
            bag.add(TUPLE_FACTORY.newTuple(entry.getKey()));
            inverseMap.put(newKey, bag);
        } else {
            bag.add(TUPLE_FACTORY.newTuple(entry.getKey()));
        }
    }
    return inverseMap;
}
 
Example #10
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testUseDefaultDelimiterBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple t2 = tf.newTuple(2);
	t2.set(0, "c");
	t2.set(1, 6);

	DataBag bag = bf.newDefaultBag();
	bag.add(t1);
	bag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(1);
	udfInput.set(0, bag);
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6", result);
}
 
Example #11
Source File: ReservoirSample.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
  getReservoir().clear();
  
  DataBag bagOfSamples = (DataBag) input.get(0);
  for (Tuple innerTuple : bagOfSamples) {
    DataBag samples = (DataBag) innerTuple.get(0);        
    
    for (Tuple sample : samples) {
      // use the same score as previously generated
      getReservoir().consider(ScoredTuple.fromIntermediateTuple(sample));
    }
  }
  
  DataBag output = BagFactory.getInstance().newDefaultBag();  
  for (ScoredTuple scoredTuple : getReservoir()) {
    // output the original tuple
    output.add(scoredTuple.getTuple());
  }

  return output;
}
 
Example #12
Source File: JoinPackager.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void attachInput(Object key, DataBag[] bags, boolean[] readOnce)
        throws ExecException {
    checkBagType();

    this.key = key;
    this.bags = bags;
    this.readOnce = readOnce;
    // JoinPackager expects all but the last bag to be materialized
    for (int i = 0; i < bags.length - 1; i++) {
        if (readOnce[i]) {
            DataBag materializedBag = getBag();
            materializedBag.addAll(bags[i]);
            bags[i] = materializedBag;
        }
    }
    if (readOnce[numInputs - 1] != true) {
        throw new ExecException(
                "JoinPackager expects the last input to be streamed");
    }
    this.newKey = true;
}
 
Example #13
Source File: AVG.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Double exec(Tuple input) throws IOException {
    try {
        DataBag b = (DataBag)input.get(0);
        Tuple combined = combine(b);

        Double sum = (Double)combined.get(0);
        if(sum == null) {
            return null;
        }
        double count = (Long)combined.get(1);

        Double avg = null;
        if (count > 0) {
            avg = new Double(sum / count);
        }
        return avg;
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing average in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }
}
 
Example #14
Source File: StringMax.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    try {
        // input is a bag with one tuple containing
        // the column we are trying to max on
        DataBag bg = (DataBag) input.get(0);
        String s = null;
        if(bg.iterator().hasNext()) {
            Tuple tp = bg.iterator().next();
            s = (String)(tp.get(0));
        }
        return tfact.newTuple(s);
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing max in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);           
    }
}
 
Example #15
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    try {
        DataBag output = BagFactory.getInstance().newDefaultBag();
        String str = input.get(0).toString();

        String title = str;

        if (title != null) {
            List<String> nGrams = makeNGrams(title);

            for (Iterator<String> it = nGrams.iterator(); it.hasNext(); ) {
                Tuple t = TupleFactory.getInstance().newTuple(1);
                t.set(0, it.next());
                output.add(t);
            }
        }

        return output;
    } catch (ExecException ee) {
        IOException ioe = new IOException(ee.getMessage());
        ioe.initCause(ee);
        throw ioe;
    }
}
 
Example #16
Source File: VALUELIST.java    From spork with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public DataBag exec(Tuple input) throws IOException {
    if(input == null || input.size() == 0) {
        return null;
    }
    Map<String, Object> m = null;
    //Input must be of type Map. This is verified at compile time
    m = (Map<String, Object>)(input.get(0));
    if(m == null) {
        return null;
    }

    Collection c = m.values();
    DataBag bag = new NonSpillableDataBag(c.size());
    Iterator<Object> iter = c.iterator();
    while(iter.hasNext()) {
        Tuple t = TUPLE_FACTORY.newTuple(iter.next());
        bag.add(t);
    }

    return bag;
}
 
Example #17
Source File: JrubyAlgebraicEvalFunc.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    if (!isInitialized())
        initialize();

    try {
        RubyDataBag inp = new RubyDataBag(ruby, ruby.getClass("DataBag"), (DataBag)input.get(0));
        IRubyObject rubyResult = rubyEngine.callMethod(getReceiver(), getStage(), inp, IRubyObject.class);
        return mTupleFactory.newTuple(PigJrubyLibrary.rubyToPig(rubyResult));
    } catch (Exception e) {
        throw new IOException("Error executing intermediate function: ",  e);
    }
}
 
Example #18
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapLookup() throws Exception {
    DataBag b = BagFactory.getInstance().newDefaultBag();
    Map<String, Object> colors = new HashMap<String, Object>();
    colors.put("apple","red");
    colors.put("orange","orange");
    
    Map<String, Object> weights = new HashMap<String, Object>();
    weights.put("apple","0.1");
    weights.put("orange","0.3");
    
    Tuple t = mTf.newTuple();
    t.append(colors);
    t.append(weights);
    b.add(t);
    
    File tempF = File.createTempFile("tmp", "");
    tempF.delete(); // we only needed the temp file name, so delete the file
    String fileName = Util.removeColon(tempF.getCanonicalPath());

    PigFile f = new PigFile(fileName);
    f.store(b, new FuncSpec(BinStorage.class.getCanonicalName()), pigServer.getPigContext());
    
    
    pigServer.registerQuery("a = load '" + Util.encodeEscape(fileName) + "' using BinStorage();");
    pigServer.registerQuery("b = foreach a generate $0#'apple',flatten($1#'orange');");
    Iterator<Tuple> iter = pigServer.openIterator("b");
    t = iter.next();
    Assert.assertEquals(t.get(0).toString(), "red");
    Assert.assertEquals(DataType.toDouble(t.get(1)), 0.3);
    Assert.assertFalse(iter.hasNext());
}
 
Example #19
Source File: BigIntegerAvg.java    From spork with Apache License 2.0 5 votes vote down vote up
static protected BigInteger count(Tuple input) throws ExecException {
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    BigInteger cnt = BigInteger.ZERO;
    while (it.hasNext()) {
        Tuple t = (Tuple)it.next();
        if (t != null && t.size() > 0 && t.get(0) != null)
            cnt = cnt.add(BigInteger.ONE);
    }
    return cnt;
}
 
Example #20
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    TupleFactory tf = TupleFactory.getInstance();
    DataBag output = BagFactory.getInstance().newDefaultBag();
    output.add(tf.newTuple("a"));
    output.add(tf.newTuple("a"));
    output.add(tf.newTuple("a"));
    return output;
}
 
Example #21
Source File: TestExampleGenerator.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilterUnion() throws Exception {
    PigServer pigServer = new PigServer(pigContext);
    pigServer.registerQuery("A = load " + A.toString() + " as (x:int, y:int);");
    pigServer.registerQuery("B = FILTER A by x  > 3;");
    pigServer.registerQuery("C = FILTER A by x < 3;");
    pigServer.registerQuery("D = UNION B, C;");
    Map<Operator, DataBag> derivedData = pigServer.getExamples("D");

    assertNotNull(derivedData);

}
 
Example #22
Source File: BagToTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Calculate the size of the output tuple based on the sum
    * of the size of each tuple in the input bag
 * 
 * @param bag
 * @return total # of data elements in a tab
 */
private long getOuputTupleSize(DataBag bag) {
	long size = 0;
	if (bag != null) {
		for (Tuple t : bag) {
			size = size + t.size();
		}
	}
	return size;
}
 
Example #23
Source File: WeightedRangePartitionerTez.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void init() {
    Map<String, Object> quantileMap = null;
    if (PigProcessor.sampleMap != null) {
        // We've collected sampleMap in PigProcessor
        quantileMap = PigProcessor.sampleMap;
    } else {
        LOG.warn("Quantiles map is empty");
        inited = true;
        return;
    }

    long start = System.currentTimeMillis();
    try {
        DataBag quantilesList = (DataBag) quantileMap.get(FindQuantiles.QUANTILES_LIST);
        InternalMap weightedPartsData = (InternalMap) quantileMap.get(FindQuantiles.WEIGHTED_PARTS);
        estimatedNumPartitions = (Integer)quantileMap.get(PigProcessor.ESTIMATED_NUM_PARALLELISM);
        convertToArray(quantilesList);
        for (Entry<Object, Object> ent : weightedPartsData.entrySet()) {
            Tuple key = (Tuple) ent.getKey(); // sample item which repeats
            float[] probVec = getProbVec((Tuple) ent.getValue());
            weightedParts.put(getPigNullableWritable(key),
                    new DiscreteProbabilitySampleGenerator(probVec));
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    comparator = ConfigUtils.getIntermediateInputKeyComparator(job);
    LOG.info("Initialized WeightedRangePartitionerTez. Time taken: " + (System.currentTimeMillis() - start));
    inited = true;
}
 
Example #24
Source File: VAR.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Double exec(Tuple input) throws IOException {
    try {
        DataBag b = (DataBag)input.get(0);
        Tuple combined = combine(b);

        Double sum = (Double)combined.get(0);
        Double sumSquare = (Double)combined.get(1);
        if(sum == null) {
            return null;
        }
        Long count = (Long)combined.get(2);

        Double var = null;
        
        if (count > 0) {
            Double avg = new Double(sum / count);
            Double avgSquare = new Double(sumSquare / count);
            var = avgSquare - avg*avg;
        }
        return var;
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing variance in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }
}
 
Example #25
Source File: DoubleAvg.java    From spork with Apache License 2.0 5 votes vote down vote up
static protected long count(Tuple input) throws ExecException {
    DataBag values = (DataBag)input.get(0);
    Iterator it = values.iterator();
    long cnt = 0;
    while (it.hasNext()){
        Tuple t = (Tuple)it.next();
        if (t != null && t.size() > 0 && t.get(0) != null)
            cnt++;
    }
    return cnt;
}
 
Example #26
Source File: MetricUDF.java    From datafu with Apache License 2.0 5 votes vote down vote up
/**
 * This UDF expects a query vector as the first element, a threshold (double) as the second, and a bag of vectors.
 * Vectors are represented by tuples with doubles as elements or bags of tuples representing position and value
 * in the case of sparse vectors.
 *
 * <p>
 * It returns one of the tuples of the bag of vectors.  For an example of its use, please see datafu.pig.hash.lsh.CosineDistanceHash.
 * </p>
 *
 * @see datafu.pig.hash.lsh.CosineDistanceHash
 */
@Override
public Tuple exec(Tuple input) throws IOException {
  Object firstElement = input.get(0);
  double distanceRange = ((Number)input.get(1)).doubleValue();
  DataBag vectorBag = (DataBag)input.get(2);
  RealVector referenceVector = null;
  if(firstElement instanceof Tuple)
  {
    //in which case the first element is a non-sparse tuple
    referenceVector = DataTypeUtil.INSTANCE.convert((Tuple)firstElement, dim);
  }
  else {
    //in which case the first element is a bag, representing a sparse tuple
    referenceVector = DataTypeUtil.INSTANCE.convert(input, dim);
  }
  
  for(Tuple vecTuple : vectorBag )
  {
    Object vectorObj = vecTuple.get(0);
    RealVector v2 = null;
    if(vectorObj instanceof Tuple)
    {
      v2 = DataTypeUtil.INSTANCE.convert((Tuple)vecTuple.get(0), referenceVector.getDimension());
    }
    else
    {
      v2 = DataTypeUtil.INSTANCE.convert(vecTuple, referenceVector.getDimension());
    }
    double dist = dist(referenceVector, v2);
    if(dist < distanceRange)
    {
      return vecTuple;
    }
  }
  return null;
}
 
Example #27
Source File: AllFirstLetter.java    From spork with Apache License 2.0 5 votes vote down vote up
public String exec(Tuple input) throws IOException {
    result = "";
    DataBag bag = (DataBag) input.get(0);
    Iterator<Tuple> it = bag.iterator();
    while (it.hasNext()) {
        Tuple t = it.next();
        if (t != null && t.size() > 0 && t.get(0) != null)
            result += t.get(0).toString().substring(0, 1);
    }
    return result;
}
 
Example #28
Source File: FloatAvg.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    try {
        Tuple t = mTupleFactory.newTuple(2);
        // input is a bag with one tuple containing
        // the column we are trying to avg on
        DataBag bg = (DataBag) input.get(0);
        Float f = null;
        if(bg.iterator().hasNext()) {
            Tuple tp = bg.iterator().next();
            f = (Float)(tp.get(0));
        }
        t.set(0, f != null ? new Double(f) : null);
        if (f != null)
            t.set(1, 1L);
        else
            t.set(1, 0L);
        return t;
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing average in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);           
    }
        
}
 
Example #29
Source File: DoubleVAR.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Double exec(Tuple input) throws IOException {
    try {
        DataBag b = (DataBag)input.get(0);
        Tuple combined = combine(b);

        Double sum = (Double)combined.get(0);
        Double sumSquare = (Double)combined.get(1);
        if(sum == null) {
            return null;
        }
        Long count = (Long)combined.get(2);

        Double var = null;
        
        if (count > 0) {
            Double avg = new Double(sum / count);
            Double avgSquare = new Double(sumSquare / count);
            var = avgSquare - avg*avg;
        }
        return var;
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing variance in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }
}
 
Example #30
Source File: TestMapSideCogroup.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyDeltaFile() throws Exception{

    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' using "+ DummyCollectableLoader.class.getName() +"() as (c1:chararray,c2:int);");
    pigServer.registerQuery("B = LOAD '" + EMPTY_FILE + "' using "+ DummyIndexableLoader.class.getName()   +"() as (c1:chararray,c2:int);");

    DataBag dbMergeCogrp = BagFactory.getInstance().newDefaultBag();

    pigServer.registerQuery("C = cogroup A by c1, B by c1 using 'merge';");
    Iterator<Tuple> iter = pigServer.openIterator("C");

    while(iter.hasNext()) {
        Tuple t = iter.next();
        dbMergeCogrp.add(t);
    }

    String[] results = new String[]{
            "(1,{(1,1),(1,2),(1,3)},{})",
            "(2,{(2,1),(2,2),(2,3)},{})",
            "(3,{(3,1),(3,2),(3,3)},{})"
    };

    assertEquals(3, dbMergeCogrp.size());
    Iterator<Tuple> itr = dbMergeCogrp.iterator();
    for(int i=0; i<3; i++){
        assertEquals(itr.next().toString(), results[i]);   
    }
    assertFalse(itr.hasNext());
}