Java Code Examples for org.apache.pig.data.DataBag#add()

The following examples show how to use org.apache.pig.data.DataBag#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBuiltin.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testCOUNTIntermed() throws Exception {
    Integer input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };

    DataBag intermediateInputBag = bagFactory.newDefaultBag();
    // call initial and then Intermed
    for (Integer i : input) {
        Tuple t = tupleFactory.newTuple(i);
        DataBag b = bagFactory.newDefaultBag();
        b.add(t);
        Tuple initialInput = tupleFactory.newTuple(b);
        EvalFunc<?> initial = new COUNT.Initial();
        intermediateInputBag.add((Tuple)initial.exec(initialInput));
    }

    EvalFunc<Tuple> countIntermed = new COUNT.Intermediate();
    Tuple intermediateInput = tupleFactory.newTuple(intermediateInputBag);
    Tuple output = countIntermed.exec(intermediateInput);

    Long f1 = DataType.toLong(output.get(0));
    assertEquals("Expected count to be 10", 10, f1.longValue());
}
 
Example 2
Source File: TestSkewedJoin.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testSkewedJoinNullKeys() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    try {
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join A by id, B by id using 'skewed';");
            Iterator<Tuple> iter = pigServer.openIterator("C");

            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
    } catch(Exception e) {
        System.out.println(e.getMessage());
        e.printStackTrace();
        fail("Should support null keys in skewed join");
    }
    return;
}
 
Example 3
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testUseDefaultDelimiterBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple t2 = tf.newTuple(2);
	t2.set(0, "c");
	t2.set(1, 6);

	DataBag bag = bf.newDefaultBag();
	bag.add(t1);
	bag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(1);
	udfInput.set(0, bag);
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6", result);
}
 
Example 4
Source File: Distinct.java    From spork with Apache License 2.0 6 votes vote down vote up
static private DataBag getDistinctFromNestedBags(Tuple input, EvalFunc evalFunc) throws IOException {
    DataBag result = createDataBag();
    long progressCounter = 0;
    try {
        DataBag bg = (DataBag)input.get(0);
        if (bg == null) {
            return result;
        }
        for (Tuple tuple : bg) {
            // Each tuple has a single column
            // which is a bag. Get tuples out of it
            // and distinct over all tuples
            for (Tuple t : (DataBag)tuple.get(0)) {
                result.add(t);
                ++progressCounter;
                if((progressCounter % 1000) == 0){
                    evalFunc.progress();
                }
            }
        }
    } catch (ExecException e) {
       throw e;
    }
    return result;
}
 
Example 5
Source File: VARTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void varDoubleAlgebraicFinalTest() throws Exception {
  DoubleVAR.Initial initialVar = new DoubleVAR.Initial();
  DoubleVAR.Intermediate intermediateVar = new DoubleVAR.Intermediate();
  DoubleVAR.Final finalVar = new DoubleVAR.Final();
  
  DataBag finalBag = BagFactory.getInstance().newDefaultBag();
  
  for (int i=1; i<=1000; i++)
  {
    DataBag bag;
    Tuple t = TupleFactory.getInstance().newTuple(1);
    t.set(0, (double)i);
    bag = BagFactory.getInstance().newDefaultBag();
    bag.add(t);
    Tuple input = TupleFactory.getInstance().newTuple(bag);
    Tuple intermediateTuple = initialVar.exec(input);
    DataBag intermediateBag = BagFactory.getInstance().newDefaultBag();
    intermediateBag.add(intermediateTuple);
    intermediateTuple = intermediateVar.exec(TupleFactory.getInstance().newTuple(intermediateBag)); 
    finalBag.add(intermediateTuple);
  }
   
  Double result = finalVar.exec(TupleFactory.getInstance().newTuple(finalBag));
  
  Assert.assertTrue("Expected about 83333.25 but found " + result,Math.abs(83333.25 - result) < 0.0001);
}
 
Example 6
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
private DataBag buildBagWithNestedTupleAndBag() throws ExecException {
	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple nestedTuple = tf.newTuple(2);
	nestedTuple.set(0, "d");
	nestedTuple.set(1, 7);

	Tuple t2 = tf.newTuple(3);
	t2.set(0, "c");
	t2.set(1, 6);
	t2.set(2, nestedTuple);

	DataBag nestedBag = bf.newDefaultBag();
	Tuple tupleInNestedBag = tf.newTuple(2);
	tupleInNestedBag.set(0, "in bag");
	tupleInNestedBag.set(1, 10);
	nestedBag.add(tupleInNestedBag);

	Tuple t3 = tf.newTuple(1);
	t3.set(0, nestedBag);

	DataBag bag = bf.newDefaultBag();
	bag.add(t1);
	bag.add(t2);
	bag.add(t3);
	return bag;
}
 
Example 7
Source File: TestMapSideCogroup.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDataWithNullKeys() throws Exception{

    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    pigServer.registerQuery("A = LOAD '" + DATA_WITH_NULL_KEYS + "' using "+ DummyCollectableLoader.class.getName() +"() as (c1:chararray,c2:int);");
    pigServer.registerQuery("B = LOAD '" + DATA_WITH_NULL_KEYS + "' using "+ DummyIndexableLoader.class.getName()   +"() as (c1:chararray,c2:int);");

    String[] results = new String[]{
            "(,{(,1),(,2),(,3)},{})",
            "(,{},{(,1),(,2),(,3)})",
            "(2,{(2,3),(2,1),(2,2)},{(2,1),(2,2),(2,3)})",
            "(3,{(3,3),(3,1),(3,2)},{(3,1),(3,2),(3,3)})"
    };   

    DataBag dbMergeCogrp = BagFactory.getInstance().newDefaultBag();

    pigServer.registerQuery("C = cogroup A by c1, B by c1 using 'merge';");
    Iterator<Tuple> iter = pigServer.openIterator("C");

    while(iter.hasNext()) {
        Tuple t = iter.next();
        dbMergeCogrp.add(t);
    }

    assertEquals(4, dbMergeCogrp.size());
    Iterator<Tuple> itr = dbMergeCogrp.iterator();
    for(int i=0; i<4; i++){
        assertEquals(itr.next().toString(), results[i]);   
    }
    assertFalse(itr.hasNext());

}
 
Example 8
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Object> exec(Tuple input) throws IOException {

    TupleFactory tupleFactory = TupleFactory.getInstance();
    ArrayList<Object> objList = new ArrayList<Object>();
    objList.add(new Integer(1));
    objList.add(new Double(1.0));
    objList.add(new Float(1.0));
    objList.add(new String("World!"));
    Tuple tuple = tupleFactory.newTuple(objList);

    BagFactory bagFactory = BagFactory.getInstance();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tuple);

    Map<String, Object> mapInMap = new HashMap<String, Object>();
    mapInMap.put("int", new Integer(10));
    mapInMap.put("float", new Float(10.0));

    Map<String, Object> myMap = new HashMap<String, Object>();
    myMap.put("string", new String("Hello"));
    myMap.put("int", new Integer(1));
    myMap.put("long", new Long(1));
    myMap.put("float", new Float(1.0));
    myMap.put("double", new Double(1.0));
    myMap.put("dba", new DataByteArray(new String("bytes").getBytes()));
    myMap.put("map", mapInMap);
    myMap.put("tuple", tuple);
    myMap.put("bag", bag);
    return myMap; 
}
 
Example 9
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapLookup() throws Exception {
    DataBag b = BagFactory.getInstance().newDefaultBag();
    Map<String, Object> colors = new HashMap<String, Object>();
    colors.put("apple","red");
    colors.put("orange","orange");

    Map<String, Object> weights = new HashMap<String, Object>();
    weights.put("apple","0.1");
    weights.put("orange","0.3");

    Tuple t = mTf.newTuple();
    t.append(colors);
    t.append(weights);
    b.add(t);

    File tmpFile = File.createTempFile("tmp", "");
    tmpFile.delete(); // we only needed the temp file name, so delete the file
    String fileName = Util.removeColon(tmpFile.getAbsolutePath());

    PigFile f = new PigFile(fileName);
    f.store(b, new FuncSpec(BinStorage.class.getCanonicalName()),
            pigServer.getPigContext());

    pigServer.registerQuery("a = load '" + Util.encodeEscape(fileName) + "' using BinStorage();");
    pigServer.registerQuery("b = foreach a generate $0#'apple',flatten($1#'orange');");
    Iterator<Tuple> iter = pigServer.openIterator("b");
    t = iter.next();
    Assert.assertEquals(t.get(0).toString(), "red");
    Assert.assertEquals(0.3d, DataType.toDouble(t.get(1)).doubleValue(), 0.0d);
    Assert.assertFalse(iter.hasNext());
    Util.deleteFile(cluster, fileName);
}
 
Example 10
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedTupleForBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple nestedTuple = tf.newTuple(2);
	nestedTuple.set(0, "d");
	nestedTuple.set(1, 7);

	Tuple t2 = tf.newTuple(3);
	t2.set(0, "c");
	t2.set(1, 6);
	t2.set(2, nestedTuple);

	DataBag inputBag = bf.newDefaultBag();
	inputBag.add(t1);
	inputBag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, inputBag);
	udfInput.set(1, "_");
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6_(d,7)", result);
}
 
Example 11
Source File: PigPerformanceLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
public DataBag bytesToBag(byte[] b, ResourceFieldSchema fs) throws IOException {
    if (b == null) return null;

    DataBag bag = bagFactory.newDefaultBag();

    int pos = 0;
    while (pos < b.length) {
        Tuple t = tupleFactory.newTuple(1);

        // Figure out how long until the next element in the list.
        int start = pos;
        while (pos < b.length && b[pos] != 2) pos++; // 2 is ^B

        byte[] copy = new byte[pos - start];
        int i, j;
        for (i = start + 1, j = 0; i < pos; i++, j++) copy[j] = b[i];

        // The first byte will tell us what type the field is.
        try {
            switch (b[start]) {
                case 105: t.set(0, bytesToInteger(copy)); break;
                case 108: t.set(0, bytesToLong(copy)); break;
                case 102: t.set(0, bytesToFloat(copy)); break;
                case 100: t.set(0, bytesToDouble(copy)); break;
                case 115: t.set(0, bytesToCharArray(copy)); break;
                case 109: t.set(0, bytesToMap(copy)); break;
                case 98: t.set(0, bytesToBag(copy, null)); break;
                default: throw new RuntimeException("unknown type " + b[start]);
            }
        } catch (ExecException ee) {
            throw new IOException(ee);
        }
        pos++; // move past the separator
        bag.add(t);
    }

    return bag;
}
 
Example 12
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    Integer content = (Integer)input.get(0);
    DataBag bag = BagFactory.getInstance().newDefaultBag();

    if (content > 10) {
        Tuple t = TupleFactory.getInstance().newTuple();
        t.append(content);
        bag.add(t);
    }
    return bag;
}
 
Example 13
Source File: SUBTRACT.java    From spork with Apache License 2.0 5 votes vote down vote up
private static DataBag subtract(DataBag bag1, DataBag bag2) {
    DataBag subtractBag2FromBag1 = BagFactory.getInstance().newDefaultBag();
    // convert each bag to Set, this does make the assumption that the sets will fit in memory.
    Set<Tuple> set1 = toSet(bag1);
    // remove elements of bag2 from set1
    Iterator<Tuple> bag2Iterator = bag2.iterator();
    while (bag2Iterator.hasNext()) {
        set1.remove(bag2Iterator.next());
    }
    // set1 now contains all elements of bag1 not in bag2 => we can build the resulting DataBag.
    for (Tuple tuple : set1) {
        subtractBag2FromBag1.add(tuple);
    }
    return subtractBag2FromBag1;
}
 
Example 14
Source File: TestUnion.java    From spork with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws Exception {
    pigServer = new PigServer(ExecType.LOCAL, new Properties());
    pc = pigServer.getPigContext();
    pc.connect();
    GenPhyOp.setPc(pc);
    POLoad ld1 = GenPhyOp.topLoadOp();
    String curDir = System.getProperty("user.dir");
    String inpDir = curDir + File.separatorChar + "test/org/apache/pig/test/data/InputFiles/";
    FileSpec fSpec = new FileSpec(Util.generateURI(inpDir + "passwd", pc), new FuncSpec(PigStorage.class.getName() , new String[]{":"}));
    ld1.setLFile(fSpec);

    POLoad ld2 = GenPhyOp.topLoadOp();
    ld2.setLFile(fSpec);

    POFilter fl1 = GenPhyOp.topFilterOpWithProj(1, 50, GenPhyOp.LTE);

    POFilter fl2 = GenPhyOp.topFilterOpWithProj(1, 50, GenPhyOp.GT);

    int[] flds = {0,2};
    Tuple sample = new DefaultTuple();
    sample.append(new String("S"));
    sample.append(new String("x"));
    sample.append(new Integer("10"));
    sample.append(new Integer("20"));
    sample.append(new String("S"));
    sample.append(new String("x"));
    sample.append(new String("S"));
    sample.append(new String("x"));

    POForEach fe1 = GenPhyOp.topForEachOPWithPlan(flds , sample);

    POForEach fe2 = GenPhyOp.topForEachOPWithPlan(flds , sample);

    sp = GenPhyOp.topUnionOp();

    PhysicalPlan plan = new PhysicalPlan();

    plan.add(ld1);
    plan.add(ld2);
    plan.add(fl1);
    plan.add(fl2);
    plan.add(fe1);
    plan.add(fe2);
    plan.add(sp);

    plan.connect(ld1, fe1);
    plan.connect(fe1, fl1);
    plan.connect(ld2, fe2);
    plan.connect(fe2, fl2);
    plan.connect(fl1, sp);
    plan.connect(fl2, sp);

    /*PlanPrinter ppp = new PlanPrinter(plan);
    ppp.visit();*/


    POLoad ld3 = GenPhyOp.topLoadOp();
    ld3.setLFile(fSpec);
    DataBag fullBag = DefaultBagFactory.getInstance().newDefaultBag();
    Tuple t=null;
    for(Result res=ld3.getNextTuple();res.returnStatus!=POStatus.STATUS_EOP;res=ld3.getNextTuple()){
        fullBag.add((Tuple)res.result);
    }

    int[] fields = {0,2};
    expBag = TestHelper.projectBag(fullBag, fields);
}
 
Example 15
Source File: PigParquetReader.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
protected void addElement(DataBag bag, T element) {
  bag.add(tupleFactory.newTuple(element));
}
 
Example 16
Source File: TestBuiltin.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testAVGFinal() throws Exception {
    String[] avgTypes = {"AVGFinal", "DoubleAvgFinal", "LongAvgFinal", "IntAvgFinal", "FloatAvgFinal", "BigDecimalAvgFinal", "BigIntegerAvgFinal"};
    String[] avgIntermediateTypes = {"AVGIntermediate", "DoubleAvgIntermediate", "LongAvgIntermediate", "IntAvgIntermediate", "FloatAvgIntermediate",
                                     "BigDecimalAvgIntermediate", "BigIntegerAvgIntermediate"};
    for (int k = 0; k < avgTypes.length; k++) {
        EvalFunc<?> avg = evalFuncMap.get(avgTypes[k]);
        Tuple tup = inputMap.get(getInputType(avgTypes[k]));

        // To test AVGFinal, AVGIntermediate should first be called and
        // the output of AVGIntermediate should be supplied as input to
        // AVGFinal. To simulate this, we will call Intermediate twice
        // on the above tuple and collect the outputs and pass it to
        // Final.

        // get the right "Intermediate" EvalFunc
        EvalFunc<?> avgIntermediate = evalFuncMap.get(avgIntermediateTypes[k]);
        // The tuple we got above has a bag with input
        // values. Input to the Intermediate.exec() however comes
        // from the map which would put each value and a count of
        // 1 in a tuple and send it down. So lets create a bag with
        // tuples that have two fields - the value and a count 1.
        // The input has 10 values - lets put the first five of them
        // in the input to the first call of AVGIntermediate and the
        // remaining five in the second call.
        DataBag bg = (DataBag) tup.get(0);
        DataBag  bg1 = bagFactory.newDefaultBag();
        DataBag  bg2 = bagFactory.newDefaultBag();
        int i = 0;
        for (Tuple t: bg) {
            Tuple newTuple = tupleFactory.newTuple(2);
            newTuple.set(0, t.get(0));
            if ( t.get(0) == null) {
                if (getInputType(avgTypes[k]) == "BigDecimal") {
                    newTuple.set(1, BigDecimal.ZERO);
                } else if (getInputType(avgTypes[k]) == "BigInteger") {
                    newTuple.set(1, BigInteger.ZERO);
                } else {
                    newTuple.set(1, new Long(0));
                }
            } else {
                if (getInputType(avgTypes[k]) == "BigDecimal") {
                    newTuple.set(1, BigDecimal.ONE);
                } else if (getInputType(avgTypes[k]) == "BigInteger") {
                    newTuple.set(1, BigInteger.ONE);
                } else {
                    newTuple.set(1, new Long(1));
                }
            }
            if (i < 5) {
                bg1.add(newTuple);
            } else {
                bg2.add(newTuple);
            }
            i++;
        }
        Tuple intermediateInput1 = tupleFactory.newTuple();
        intermediateInput1.append(bg1);
        Object output1 = avgIntermediate.exec(intermediateInput1);
        Tuple intermediateInput2 = tupleFactory.newTuple();
        intermediateInput2.append(bg2);
        Object output2 = avgIntermediate.exec(intermediateInput2);

        DataBag bag = Util.createBag(new Tuple[]{(Tuple)output1, (Tuple)output2});

        Tuple finalTuple = TupleFactory.getInstance().newTuple(1);
        finalTuple.set(0, bag);
        Object output = avg.exec(finalTuple);
        String msg = "[Testing " + avgTypes[k] + " on input type: " + getInputType(avgTypes[k]) + " ( (output) " +
        output + " == " + getExpected(avgTypes[k]) + " (expected) )]";
        if (getInputType(avgTypes[k]) == "BigDecimal" || getInputType(avgTypes[k]) == "BigInteger") {
            assertEquals(msg, ((BigDecimal)getExpected(avgTypes[k])).toPlainString(), ((BigDecimal)output).toPlainString());
        } else {
            assertEquals(msg, (Double)getExpected(avgTypes[k]), (Double)output, 0.00001);
        }
    }
}
 
Example 17
Source File: Stitch.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {

    if (input == null || input.size() == 0) return null;

    List<DataBag> bags = new ArrayList<DataBag>(input.size());

    for (int i = 0; i < input.size(); i++) {
        Object o = input.get(i);
        try {
            bags.add((DataBag)o);
        } catch (ClassCastException cce) {
            int errCode = 2107; // TODO not sure this is the right one
            String msg = "Stitch expected bags as input but argument " +
                i + " is a " + DataType.findTypeName(o);
            throw new ExecException(msg, errCode, PigException.INPUT);
        }
    }

    if (bags.size() == 1) return bags.get(0);

    DataBag output = BagFactory.getInstance().newDefaultBag();
    List<Iterator<Tuple>> iters = new ArrayList<Iterator<Tuple>>(bags.size());
    for (DataBag bag : bags) {
        iters.add(bag.iterator());
    }

    while (iters.get(0).hasNext()) {
        Tuple outTuple = TupleFactory.getInstance().newTuple();
        for (Iterator<Tuple> iter : iters) {
            if (iter.hasNext()) {
                Tuple t = iter.next();
                List<Object> fields = t.getAll();
                for (Object field : fields) {
                    outTuple.append(field);
                }
            }
        }
        output.add(outTuple);
    }
    return output;
}
 
Example 18
Source File: AugmentBaseDataVisitor.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public void visit(LOForEach forEach) throws FrontendException {
    if (limit && !((PreOrderDepthFirstWalker) currentWalker).getBranchFlag())
        return;
    DataBag outputConstraints = outputConstraintsMap.get(forEach);
    outputConstraintsMap.remove(forEach);
    LogicalPlan plan = forEach.getInnerPlan();
    boolean ableToHandle = true;
    List<Integer> cols = new ArrayList<Integer>();
    boolean cast = false;

    if (outputConstraints == null || outputConstraints.size() == 0)
        // we dont have to do anything in this case
        return;


    Operator op = plan.getSinks().get(0);
    if (op instanceof CastExpression) {
            cast = true;
            op = ((CastExpression) op).getExpression();
        }

        if (!(op instanceof ProjectExpression)) {
            ableToHandle = false;
        } else {
            cols.add(Integer.valueOf(((ProjectExpression) op).getColNum()));
        }

    if (ableToHandle) {
        // we can only handle simple projections
        DataBag output = BagFactory.getInstance().newDefaultBag();
        for (Iterator<Tuple> it = outputConstraints.iterator(); it
                .hasNext();) {
            Tuple outputConstraint = it.next();
            try {
                Tuple inputConstraint = BackPropConstraint(
                        outputConstraint, cols, ((LogicalRelationalOperator)plan
                                .getPredecessors(forEach).get(0))
                                .getSchema(), cast);
                output.add(inputConstraint);
            } catch (Exception e) {
                e.printStackTrace();
                throw new FrontendException(
                        "Operator error during Augmenting Phase in Example Generator "
                                + e.getMessage());
            }
        }
        outputConstraintsMap.put(plan.getPredecessors(forEach)
                .get(0), output);
    }

}
 
Example 19
Source File: TestPOGenerate.java    From spork with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws Exception {
    Tuple [] inputA = new Tuple[4];
    Tuple [] inputB = new Tuple[4];
    for(int i = 0; i < 4; i++) {
        inputA[i] = tf.newTuple(2);
        inputB[i] = tf.newTuple(1);
    }
    inputA[0].set(0, 'a');
    inputA[0].set(1, '1');
    inputA[1].set(0, 'b');
    inputA[1].set(1, '1');
    inputA[2].set(0, 'a');
    inputA[2].set(1, '1');
    inputA[3].set(0, 'c');
    inputA[3].set(1, '1');
    inputB[0].set(0, 'b');
    inputB[1].set(0, 'b');
    inputB[2].set(0, 'a');
    inputB[3].set(0, 'd');
    DataBag cg11 = bf.newDefaultBag();
    cg11.add(inputA[0]);
    cg11.add(inputA[2]);
    DataBag cg21 = bf.newDefaultBag();
    cg21.add(inputA[1]);
    DataBag cg31 = bf.newDefaultBag();
    cg31.add(inputA[3]);
    DataBag emptyBag = bf.newDefaultBag();
    DataBag cg12 = bf.newDefaultBag();
    cg12.add(inputB[2]);
    DataBag cg22 = bf.newDefaultBag();
    cg22.add(inputB[0]);
    cg22.add(inputB[1]);
    DataBag cg42 = bf.newDefaultBag();
    cg42.add(inputB[3]);
    Tuple [] tIn = new Tuple[4];
    for(int i = 0; i < 4; ++i) {
        tIn[i] = tf.newTuple(2);
    }
    tIn[0].set(0, cg11);
    tIn[0].set(1, cg12);
    tIn[1].set(0, cg21);
    tIn[1].set(1, cg22);
    tIn[2].set(0, cg31);
    tIn[2].set(1, emptyBag);
    tIn[3].set(0, emptyBag);
    tIn[3].set(1, cg42);

    cogroup = bf.newDefaultBag();
    for(int i = 0; i < 4; ++i) {
        cogroup.add(tIn[i]);
    }

    Tuple[] tPartial = new Tuple[4];
    for(int i = 0; i < 4; ++i) {
        tPartial[i] = tf.newTuple(2);
        tPartial[i].set(0, inputA[i].get(0));
        tPartial[i].set(1, inputA[i].get(1));
    }

    tPartial[0].append(cg12);

    tPartial[1].append(cg22);

    tPartial[2].append(cg12);

    tPartial[3].append(emptyBag);

    partialFlatten = bf.newDefaultBag();
    for(int i = 0; i < 4; ++i) {
        partialFlatten.add(tPartial[i]);
    }

    simpleGenerate = bf.newDefaultBag();
    for(int i = 0; i < 4; ++i) {
        simpleGenerate.add(inputA[i]);
    }

    //System.out.println("Cogroup : " + cogroup);
    //System.out.println("Partial : " + partialFlatten);
    //System.out.println("Simple : " + simpleGenerate);

}
 
Example 20
Source File: PigParquetReader.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
protected void addElement(DataBag bag, T element) {
  bag.add(TF.newTuple(element));
}