org.apache.pig.data.BagFactory Java Examples

The following examples show how to use org.apache.pig.data.BagFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TransposeTupleToBag.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException
{
  // initialize a reverse mapping
  HashMap<Integer, String> positionToAlias = new HashMap<Integer, String>();
  for (String alias : getFieldAliases().keySet()) {
    positionToAlias.put(getFieldAliases().get(alias), alias);
  }
  DataBag output = BagFactory.getInstance().newDefaultBag();
  for (int i=0; i<input.size(); i++) {
    Tuple tuple = TupleFactory.getInstance().newTuple();
    tuple.append(positionToAlias.get(i));
    tuple.append(input.get(i));
    output.add(tuple);
  }
  return output;
}
 
Example #2
Source File: TOBAG2.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    try {
        DataBag bag = BagFactory.getInstance().newDefaultBag();

        for (int i = 0; i < input.size(); ++i) {
            final Object object = input.get(i);
            if (object instanceof Tuple) {
                bag.add( (Tuple) object);
            } else {
                Tuple tp2 = TupleFactory.getInstance().newTuple(1);
                tp2.set(0, object);
                bag.add(tp2);
            }
        }

        return bag;
    } catch (Exception ee) {
        throw new RuntimeException("Error while creating a bag", ee);
    }
}
 
Example #3
Source File: POCross.java    From spork with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void accumulateData() throws ExecException {
    int count = 0;
    int length = inputs.size() - 1;
    inputBags = new DataBag[length];
    its = new Iterator[length];
    for (int i = 0; i < length; ++i) {
        PhysicalOperator op = inputs.get(i);
        DataBag bag = BagFactory.getInstance().newDefaultBag();
        inputBags[count] = bag;
        for (Result res = op.getNextTuple(); res.returnStatus != POStatus.STATUS_EOP; res = op
                .getNextTuple()) {
            if (res.returnStatus == POStatus.STATUS_NULL)
                continue;
            if (res.returnStatus == POStatus.STATUS_ERR)
                throw new ExecException(
                        "Error accumulating data in the local Cross operator");
            if (res.returnStatus == POStatus.STATUS_OK)
                bag.add((Tuple) res.result);
        }
        its[count++] = bag.iterator();
    }
}
 
Example #4
Source File: WeightedReservoirSamplingTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void weightedReservoirSampleAccumulateTest() throws IOException
{
   WeightedReservoirSample sampler = new WeightedReservoirSample("10", "1");

   for (int i=0; i<100; i++)
   {
     Tuple t = TupleFactory.getInstance().newTuple(2);
     t.set(0, i);
     t.set(1, i + 1);
     DataBag bag = BagFactory.getInstance().newDefaultBag();
     bag.add(t);
     Tuple input = TupleFactory.getInstance().newTuple(bag);
     sampler.accumulate(input);
   }

   DataBag result = sampler.getValue();
   verifyNoRepeatAllFound(result, 10, 0, 100);
}
 
Example #5
Source File: ReverseEnumerate.java    From datafu with Apache License 2.0 6 votes vote down vote up
public DataBag call(DataBag inputBag) throws IOException
{
  DataBag outputBag = BagFactory.getInstance().newDefaultBag();
  long i = start, count = 0;
  i = inputBag.size() - 1 + start;

  for (Tuple t : inputBag) {
    Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll());
    t1.append(i);
    outputBag.add(t1);

    if (count % 1000000 == 0) {
      outputBag.spill();
      count = 0;
    }
    i--;
    count++;
  }

  return outputBag;
}
 
Example #6
Source File: TestMapReduce.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    try {
        DataBag output = BagFactory.getInstance().newDefaultBag();
        Iterator<Tuple> it = (DataType.toBag(input.get(0))).iterator();
        while(it.hasNext()) {
            Tuple t = it.next();
            Tuple newT = TupleFactory.getInstance().newTuple(2);
            newT.set(0, field0);
            newT.set(1, t.get(0).toString());
            output.add(newT);
        }

        return output;
    } catch (ExecException ee) {
        IOException ioe = new IOException(ee.getMessage());
        ioe.initCause(ee);
        throw ioe;
    }
}
 
Example #7
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {    
    try {
        DataBag output = BagFactory.getInstance().newDefaultBag();
        String str = input.get(0).toString();
    
        String title = str;

        if (title != null) {
            List<String> nGrams = makeNGrams(title);
            
            for (Iterator<String> it = nGrams.iterator(); it.hasNext(); ) {
                Tuple t = TupleFactory.getInstance().newTuple(1);
                t.set(0, it.next());
                output.add(t);
            }
        }
    
        return output;
    } catch (ExecException ee) {
        IOException ioe = new IOException(ee.getMessage());
        ioe.initCause(ee);
        throw ioe;
    }
}
 
Example #8
Source File: TestLogicalPlanBuilder.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyBagConst() throws Exception{
    String query = "a = foreach (load 'b') generate {};" +
                   "store a into 'output';";
    LogicalPlan lp = buildPlan(query);
    Operator store = lp.getSinks().get(0);
    LOForEach foreach = (LOForEach) lp.getPredecessors(store).get(0);
    LOGenerate gen = (LOGenerate)foreach.getInnerPlan().getSinks().get(0);
    LogicalExpressionPlan exprPlan = gen.getOutputPlans().get(0);
    Operator logOp = exprPlan.getSources().get(0);
    Assert.assertTrue( logOp instanceof ConstantExpression);

    ConstantExpression loConst = (ConstantExpression)logOp;
    Assert.assertTrue(loConst.getType() == DataType.BAG);
    Assert.assertTrue(loConst.getValue() instanceof DataBag);
    Assert.assertTrue(loConst.getValue().equals(BagFactory.getInstance().newDefaultBag()));

    String s = foreach.getSchema().toString(false);
    Assert.assertTrue( s.equals(":bag{}") );
}
 
Example #9
Source File: TestPODistinct.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPODistictWithIntAndNullValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(r.nextInt(MAX_VALUE));
        input.add(t);
        t = tf.newTuple();
        t.append(null);
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example #10
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBasicBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple t2 = tf.newTuple(2);
	t2.set(0, "c");
	t2.set(1, 6);

	DataBag bag = bf.newDefaultBag();
	bag.add(t1);
	bag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, bag);
	udfInput.set(1, "-");
	String result = udf.exec(udfInput);

	assertEquals("a-5-c-6", result);
}
 
Example #11
Source File: TestSkewedJoin.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testSkewedJoinNullKeys() throws IOException {
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE5 + "' as (id,name);");
    try {
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join A by id, B by id using 'skewed';");
            Iterator<Tuple> iter = pigServer.openIterator("C");

            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
    } catch(Exception e) {
        System.out.println(e.getMessage());
        e.printStackTrace();
        fail("Should support null keys in skewed join");
    }
    return;
}
 
Example #12
Source File: TestBinInterSedes.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleWriteRead1() throws IOException {
        //create a tuple with columns of different type
        Tuple tuplein = TupleFactory.getInstance().newTuple(7);
        tuplein.set(0, 12);
        Map<String, String> map = new HashMap<String, String>();
        map.put("pig", "scalability");
        tuplein.set(1, map);
        tuplein.set(2, null);
        tuplein.set(3, 12L);
        tuplein.set(4, 1.2F);

        Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
        innerTuple.set(0, "innerTuple");
        tuplein.set(5, innerTuple);
        DataBag bag = BagFactory.getInstance().newDefaultBag();
        bag.add(innerTuple);
        tuplein.set(6, bag);

        testTupleSedes(tuplein);
        
        assertEquals(
                "(12,[pig#scalability],,12,1.2,(innerTuple),{(innerTuple)})",
                TupleFormat.format(tuplein));
}
 
Example #13
Source File: ToBag.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    try {
        DataBag bag = BagFactory.getInstance().newDefaultBag();

        for (int i = 0; i < input.size(); ++i) {
            final Object object = input.get(i);
            if (object != null) {
                Tuple tp2 = TupleFactory.getInstance().newTuple(1);
                tp2.set(0, object);
                bag.add(tp2);
            }
        }

        return bag;
    } catch (Exception ee) {
        throw new RuntimeException("Error while creating a bag", ee);
    }
}
 
Example #14
Source File: TestPODistinct.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPODistictArityWithNullValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        if ( r.nextInt(MAX_VALUE) % 3 == 0 ){
            t.append(null);
        }
        t.append(r.nextInt(MAX_VALUE));
        t.append(r.nextInt(MAX_VALUE));
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example #15
Source File: ReservoirSample.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
  getReservoir().clear();
  
  DataBag bagOfSamples = (DataBag) input.get(0);
  for (Tuple innerTuple : bagOfSamples) {
    DataBag samples = (DataBag) innerTuple.get(0);        
    
    for (Tuple sample : samples) {
      // use the same score as previously generated
      getReservoir().consider(ScoredTuple.fromIntermediateTuple(sample));
    }
  }
  
  DataBag output = BagFactory.getInstance().newDefaultBag();
  for (ScoredTuple scoredTuple : getReservoir()) {
    // add the score on to the intermediate tuple
    output.add(scoredTuple.getIntermediateTuple(tupleFactory));
  }

  return tupleFactory.newTuple(output);
}
 
Example #16
Source File: TestLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    DataBag output = BagFactory.getInstance().newDefaultBag();
    for (Tuple t : DataType.toBag(input.get(0))) {
        Tuple newT = TupleFactory.getInstance().newTuple(2);
        newT.set(0, field0);
        newT.set(1, t.get(0).toString());
        output.add(newT);
    }

    return output;
}
 
Example #17
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedTupleForBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple nestedTuple = tf.newTuple(2);
	nestedTuple.set(0, "d");
	nestedTuple.set(1, 7);

	Tuple t2 = tf.newTuple(3);
	t2.set(0, "c");
	t2.set(1, 6);
	t2.set(2, nestedTuple);

	DataBag inputBag = bf.newDefaultBag();
	inputBag.add(t1);
	inputBag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, inputBag);
	udfInput.set(1, "_");
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6_(d,7)", result);
}
 
Example #18
Source File: Util.java    From spork with Apache License 2.0 5 votes vote down vote up
static public Tuple loadTuple(Tuple t, String[][] input) throws ExecException {
    for (int i = 0; i < input.length; i++) {
        DataBag bag = BagFactory.getInstance().newDefaultBag();
        Tuple f = loadTuple(TupleFactory.getInstance().newTuple(input[i].length), input[i]);
        bag.add(f);
        t.set(i, bag);
    }
    return t;
}
 
Example #19
Source File: TestStitch.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSecondShort() throws Exception {
    Stitch func = new Stitch();
    DataBag b1 = BagFactory.getInstance().newDefaultBag();
    Tuple t = TupleFactory.getInstance().newTuple();
    t.append("a");
    t.append("b");
    b1.add(t);
    t = TupleFactory.getInstance().newTuple();
    t.append("c");
    t.append("d");
    b1.add(t);
    
    DataBag b2 = BagFactory.getInstance().newDefaultBag();
    t = TupleFactory.getInstance().newTuple();
    t.append("1");
    t.append("2");
    b2.add(t);

    t = TupleFactory.getInstance().newTuple();
    t.append(b1);
    t.append(b2);
    DataBag out = func.exec(t);
    assertEquals(2, out.size());
    Iterator<Tuple> iter = out.iterator();
    t = iter.next();
    assertEquals(4, t.size());
    assertEquals("a", t.get(0));
    assertEquals("b", t.get(1));
    assertEquals("1", t.get(2));
    assertEquals("2", t.get(3));
    t = iter.next();
    assertEquals(2, t.size());
    assertEquals("c", t.get(0));
    assertEquals("d", t.get(1));
}
 
Example #20
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    TupleFactory tf = TupleFactory.getInstance();
    DataBag output = BagFactory.getInstance().newDefaultBag();
    output.add(tf.newTuple("a"));
    output.add(tf.newTuple("a"));
    output.add(tf.newTuple("a"));
    return output;
}
 
Example #21
Source File: TestDataModel.java    From spork with Apache License 2.0 5 votes vote down vote up
private Tuple giveMeOneOfEach() throws Exception {
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(11);
    Tuple t2 = tf.newTuple(2);

    t2.set(0, new Integer(3));
    t2.set(1, new Float(3.0));

    DataBag bag = BagFactory.getInstance().newDefaultBag();
    bag.add(tf.newTuple(new Integer(4)));
    bag.add(tf.newTuple(new String("mary had a little lamb")));

    Map<String, Object> map = new LinkedHashMap<String, Object>(2);
    map.put(new String("hello"), new String("world"));
    map.put(new String("goodbye"), new String("all"));

    t1.set(0, t2);
    t1.set(1, bag);
    t1.set(2, map);
    t1.set(3, new Integer(42));
    t1.set(4, new Long(5000000000L));
    t1.set(5, new Float(3.141592654));
    t1.set(6, new Double(2.99792458e8));
    t1.set(7, new Boolean(true));
    t1.set(8, new DataByteArray("hello"));
    t1.set(9, new String("goodbye"));

    return t1;
}
 
Example #22
Source File: TestMapSideCogroup.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyDeltaFile() throws Exception{

    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' using "+ DummyCollectableLoader.class.getName() +"() as (c1:chararray,c2:int);");
    pigServer.registerQuery("B = LOAD '" + EMPTY_FILE + "' using "+ DummyIndexableLoader.class.getName()   +"() as (c1:chararray,c2:int);");

    DataBag dbMergeCogrp = BagFactory.getInstance().newDefaultBag();

    pigServer.registerQuery("C = cogroup A by c1, B by c1 using 'merge';");
    Iterator<Tuple> iter = pigServer.openIterator("C");

    while(iter.hasNext()) {
        Tuple t = iter.next();
        dbMergeCogrp.add(t);
    }

    String[] results = new String[]{
            "(1,{(1,1),(1,2),(1,3)},{})",
            "(2,{(2,1),(2,2),(2,3)},{})",
            "(3,{(3,1),(3,2),(3,3)},{})"
    };

    assertEquals(3, dbMergeCogrp.size());
    Iterator<Tuple> itr = dbMergeCogrp.iterator();
    for(int i=0; i<3; i++){
        assertEquals(itr.next().toString(), results[i]);   
    }
    assertFalse(itr.hasNext());
}
 
Example #23
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleFormat() {

    try {
        Tuple tuple = mTupleFactory.newTuple(7);
        tuple.set(0, 12);
        Map<String, String> map = new HashMap<String, String>();
        map.put("pig", "scalability");
        tuple.set(1, map);
        tuple.set(2, null);
        tuple.set(3, 12L);
        tuple.set(4, 1.2F);

        Tuple innerTuple = mTupleFactory.newTuple(1);
        innerTuple.set(0, "innerTuple");
        tuple.set(5, innerTuple);

        DataBag bag = BagFactory.getInstance().newDefaultBag();
        bag.add(innerTuple);
        tuple.set(6, bag);

        assertEquals(
                "(12,[pig#scalability],,12,1.2,(innerTuple),{(innerTuple)})",
                TupleFormat.format(tuple));
    } catch (ExecException e) {
        e.printStackTrace();
        fail();
    }

}
 
Example #24
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    Integer content = (Integer)input.get(0);
    DataBag bag = BagFactory.getInstance().newDefaultBag();

    if (content > 10) {
        Tuple t = TupleFactory.getInstance().newTuple();
        t.append(content);
        bag.add(t);
    }
    return bag;
}
 
Example #25
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Object> exec(Tuple input) throws IOException {

    TupleFactory tupleFactory = TupleFactory.getInstance();
    ArrayList<Object> objList = new ArrayList<Object>();
    objList.add(new Integer(1));
    objList.add(new Double(1.0));
    objList.add(new Float(1.0));
    objList.add(new String("World!"));
    Tuple tuple = tupleFactory.newTuple(objList);

    BagFactory bagFactory = BagFactory.getInstance();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tuple);

    Map<String, Object> mapInMap = new HashMap<String, Object>();
    mapInMap.put("int", new Integer(10));
    mapInMap.put("float", new Float(10.0));

    Map<String, Object> myMap = new HashMap<String, Object>();
    myMap.put("string", new String("Hello"));
    myMap.put("int", new Integer(1));
    myMap.put("long", new Long(1));
    myMap.put("float", new Float(1.0));
    myMap.put("double", new Double(1.0));
    myMap.put("dba", new DataByteArray(new String("bytes").getBytes()));
    myMap.put("map", mapInMap);
    myMap.put("tuple", tuple);
    myMap.put("bag", bag);
    return myMap; 
}
 
Example #26
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapLookup() throws Exception {
    DataBag b = BagFactory.getInstance().newDefaultBag();
    Map<String, Object> colors = new HashMap<String, Object>();
    colors.put("apple","red");
    colors.put("orange","orange");
    
    Map<String, Object> weights = new HashMap<String, Object>();
    weights.put("apple","0.1");
    weights.put("orange","0.3");
    
    Tuple t = mTf.newTuple();
    t.append(colors);
    t.append(weights);
    b.add(t);
    
    File tempF = File.createTempFile("tmp", "");
    tempF.delete(); // we only needed the temp file name, so delete the file
    String fileName = Util.removeColon(tempF.getCanonicalPath());

    PigFile f = new PigFile(fileName);
    f.store(b, new FuncSpec(BinStorage.class.getCanonicalName()), pigServer.getPigContext());
    
    
    pigServer.registerQuery("a = load '" + Util.encodeEscape(fileName) + "' using BinStorage();");
    pigServer.registerQuery("b = foreach a generate $0#'apple',flatten($1#'orange');");
    Iterator<Tuple> iter = pigServer.openIterator("b");
    t = iter.next();
    Assert.assertEquals(t.get(0).toString(), "red");
    Assert.assertEquals(DataType.toDouble(t.get(1)), 0.3);
    Assert.assertFalse(iter.hasNext());
}
 
Example #27
Source File: TestFRJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
private void setUpHashTable() throws IOException {
    FileSpec replFile = new FileSpec(repl, new FuncSpec(PigStorage.class.getName() + "()"));
    POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile);
    PigContext pc = new PigContext(ExecType.MAPREDUCE, PigMapReduce.sJobConfInternal.get());
    pc.connect();

    ld.setPc(pc);
    for (Result res = ld.getNextTuple(); res.returnStatus != POStatus.STATUS_EOP; res = ld
            .getNextTuple()) {
        Tuple tup = (Tuple)res.result;
        LoadFunc lf = ((LoadFunc)PigContext.instantiateFuncFromSpec(ld.getLFile().getFuncSpec()));
        String key = lf.getLoadCaster().bytesToCharArray(
                ((DataByteArray)tup.get(keyField)).get());
        Tuple csttup = TupleFactory.getInstance().newTuple(2);
        csttup.set(0, key);
        csttup.set(1, lf.getLoadCaster().bytesToInteger(((DataByteArray)tup.get(1)).get()));
        DataBag vals = null;
        if (replTbl.containsKey(key)) {
            vals = replTbl.get(key);
        }
        else {
            vals = BagFactory.getInstance().newDefaultBag();
            replTbl.put(key, vals);
        }
        vals.add(csttup);
    }
}
 
Example #28
Source File: SessionTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
private static Tuple buildInputBag(DateTime ...dt) throws Exception
{
  Tuple input = TupleFactory.getInstance().newTuple(1);
  DataBag inputBag = BagFactory.getInstance().newDefaultBag();
  input.set(0,inputBag);

  for (DateTime time : dt)
  {
    inputBag.add(TupleFactory.getInstance().newTuple(Collections.singletonList(time.getMillis())));
  }

  return input;
}
 
Example #29
Source File: TestSkewedJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSkewedJoinMapKey() throws IOException{
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE4 + "' as (m:[]);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE4 + "' as (n:[]);");
    DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
    pigServer.registerQuery("C = join A by (chararray)m#'a100', B by (chararray)n#'a100' using 'skewed' parallel 20;");
    Iterator<Tuple> iter = pigServer.openIterator("C");

    while(iter.hasNext()) {
        dbfrj.add(iter.next());
    }
}
 
Example #30
Source File: TestSkewedJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test(expected = FrontendException.class)
public void testSkewedJoin3Way() throws IOException{
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
    pigServer.registerQuery("C = LOAD '" + INPUT_FILE3 + "' as (id, name);");
    DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
    pigServer.registerQuery("D = join A by id, B by id, C by id using 'skewed' parallel 5;");
    Iterator<Tuple> iter = pigServer.openIterator("D");

    while(iter.hasNext())
        dbfrj.add(iter.next());
}