Java Code Examples for org.apache.pig.data.Tuple#getAll()

The following examples show how to use org.apache.pig.data.Tuple#getAll() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestForEachStar.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testForeachStarSchemaUnkown() throws IOException, ParserException{
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INPUT_FILE + "' ;"
        + "f1 = foreach l1 generate * ;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    pig.explain("f1",System.out);
    Iterator<Tuple> it = pig.openIterator("f1");
    
    
    Tuple expectedResCharArray = (Tuple)Util.getPigConstant("('one','two')");
    Tuple expectedRes = TupleFactory.getInstance().newTuple();
    for(Object field :  expectedResCharArray.getAll() ){
        expectedRes.append(new DataByteArray(field.toString()));
    }
    assertTrue("has output", it.hasNext());
    assertEquals(expectedRes, it.next());
}
 
Example 2
Source File: TupleUtils.java    From Cubert with Apache License 2.0 5 votes vote down vote up
public static void copy(Tuple src, Tuple dest) throws ExecException
{
    int idx = 0;
    for (Object val : src.getAll())
    {
        dest.set(idx++, val);
    }
}
 
Example 3
Source File: TupleUtils.java    From Cubert with Apache License 2.0 5 votes vote down vote up
public static void deepCopy(Tuple src, Tuple dest) throws ExecException
{
    int idx = 0;
    for (Object val : src.getAll())
    {
        dest.set(idx++, getFieldDeepCopy(val));
    }
}
 
Example 4
Source File: TupleUtils.java    From Cubert with Apache License 2.0 5 votes vote down vote up
public static void deepCopyWithReuse(Tuple src, Tuple dest) throws ExecException
{
    int idx = 0;
    for (Object val : src.getAll())
    {
        deepFieldCopyWithReuse(idx++, val, dest);
    }
}
 
Example 5
Source File: PigJrubyLibrary.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * A type specific conversion routine.
 *
 * @param  ruby          the Ruby runtime to create objects in
 * @param  object        object to convert
 * @return               analogous Ruby type
 * @throws ExecException object contained an object that could not convert
 */
public static RubyArray pigToRuby(Ruby ruby, Tuple object) throws ExecException{
    RubyArray rubyArray = ruby.newArray();

    for (Object o : object.getAll())
        rubyArray.add(pigToRuby(ruby, o));

    return rubyArray;
}
 
Example 6
Source File: JythonUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
public static PyTuple pigTupleToPyTuple(Tuple tuple) {
    PyObject[] pyTuple = new PyObject[tuple.size()];
    int i = 0;
    for (Object object : tuple.getAll()) {
        pyTuple[i++] = pigToPython(object);
    }
    return new PyTuple(pyTuple);
}
 
Example 7
Source File: CubeDimensions.java    From spork with Apache License 2.0 5 votes vote down vote up
public static void convertNullToUnknown(Tuple tuple) throws ExecException {
int idx = 0;
for(Object obj : tuple.getAll()) {
    if( (obj == null) ) {
	tuple.set(idx, unknown);
    }
    idx++;
}
   }
 
Example 8
Source File: TestHelper.java    From spork with Apache License 2.0 5 votes vote down vote up
private static Tuple trimTuple(Tuple t){
    Tuple ret = TupleFactory.getInstance().newTuple();
    for (Object o : t.getAll()) {
        DataByteArray dba = (DataByteArray)o;
        DataByteArray nDba = new DataByteArray(dba.toString().trim().getBytes());
        ret.append(nDba);
    }
    return ret;
}
 
Example 9
Source File: TestMultiQueryLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void putNext(Tuple f) throws IOException {
    try {
        Tuple t = TupleFactory.getInstance().newTuple();
        for (Object obj : f.getAll()) {
            t.append(obj);
        }
        t.append(suffix);
        writer.write(null, t);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}
 
Example 10
Source File: TestJoin.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testJoinSchema2() throws Exception {
    // test join where one load does not have schema
    ExecType execType = ExecType.LOCAL;
    setUp(execType );
    String[] input1 = {
            "1\t2",
            "2\t3",
            "3\t4"
    };
    String[] input2 = {
            "1\thello",
            "4\tbye",
    };

    String firstInput = createInputFile(execType, "a.txt", input1);
    String secondInput = createInputFile(execType, "b.txt", input2);
    Tuple expectedResultCharArray =
        (Tuple)Util.getPigConstant("('1','2','1','hello','1','2','1','hello')");

    Tuple expectedResult = TupleFactory.getInstance().newTuple();
    for(Object field : expectedResultCharArray.getAll()){
        expectedResult.append(new DataByteArray(field.toString()));
    }

    // with schema
    String script = "a = load '"+ Util.encodeEscape(firstInput) +"' ; " +
    //re-using alias a for new operator below, doing this intentionally
    // because such use case has been seen
    "a = foreach a generate $0 as i, $1 as j ;" +
    "b = load '"+ Util.encodeEscape(secondInput) +"' as (k, l); " +
    "c = join a by $0, b by $0;" +
    "d = foreach c generate i,j,k,l,a::i as ai,a::j as aj,b::k as bk,b::l as bl;";
    Util.registerMultiLineQuery(pigServer, script);
    Iterator<Tuple> it = pigServer.openIterator("d");
    assertTrue(it.hasNext());
    Tuple res = it.next();
    assertEquals(expectedResult, res);
    assertFalse(it.hasNext());
    deleteInputFile(execType, firstInput);
    deleteInputFile(execType, secondInput);

}
 
Example 11
Source File: TestCombiner.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testDistinctAggs1() throws Exception {
    // test the use of combiner for distinct aggs:
    String input[] = {
                    "pig1\t18\t2.1",
                    "pig2\t24\t3.3",
                    "pig5\t45\t2.4",
                    "pig1\t18\t2.1",
                    "pig1\t19\t2.1",
                    "pig2\t24\t4.5",
                    "pig1\t20\t3.1" };

    Util.createInputFile(cluster, "distinctAggs1Input.txt", input);
    PigServer pigServer = new PigServer(cluster.getExecType(), properties);
    pigServer.registerQuery("a = load 'distinctAggs1Input.txt' as (name:chararray, age:int, gpa:double);");
    pigServer.registerQuery("b = group a by name;");
    pigServer.registerQuery("c = foreach b  {" +
            "        x = distinct a.age;" +
            "        y = distinct a.gpa;" +
            "        z = distinct a;" +
            "        generate group, COUNT(x), SUM(x.age), SUM(y.gpa), SUM(a.age), " +
            "                       SUM(a.gpa), COUNT(z.age), COUNT(z), SUM(z.age);};");

    // make sure there is a combine plan in the explain output
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(baos);
    pigServer.explain("c", ps);
    assertTrue(baos.toString().matches("(?si).*combine plan.*"));

    HashMap<String, Object[]> results = new HashMap<String, Object[]>();
    results.put("pig1", new Object[] { "pig1", 3L, 57L, 5.2, 75L, 9.4, 3L, 3L, 57L });
    results.put("pig2", new Object[] { "pig2", 1L, 24L, 7.8, 48L, 7.8, 2L, 2L, 48L });
    results.put("pig5", new Object[] { "pig5", 1L, 45L, 2.4, 45L, 2.4, 1L, 1L, 45L });
    Iterator<Tuple> it = pigServer.openIterator("c");
    while (it.hasNext()) {
        Tuple t = it.next();
        List<Object> fields = t.getAll();
        Object[] expected = results.get(fields.get(0));
        int i = 0;
        for (Object field : fields) {
            assertEquals(expected[i++], field);
        }
    }
    Util.deleteFile(cluster, "distinctAggs1Input.txt");
    pigServer.shutdown();
}
 
Example 12
Source File: TestCombiner.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testDistinctNoCombiner() throws Exception {
    // test that combiner is NOT invoked when
    // one of the elements in the foreach generate
    // is a distinct() as the leaf
    String input[] = {
                    "pig1\t18\t2.1",
                    "pig2\t24\t3.3",
                    "pig5\t45\t2.4",
                    "pig1\t18\t2.1",
                    "pig1\t19\t2.1",
                    "pig2\t24\t4.5",
                    "pig1\t20\t3.1" };

    Util.createInputFile(cluster, "distinctNoCombinerInput.txt", input);
    PigServer pigServer = new PigServer(cluster.getExecType(), properties);
    pigServer.registerQuery("a = load 'distinctNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);");
    pigServer.registerQuery("b = group a by name;");
    pigServer.registerQuery("c = foreach b  {" +
            "        z = distinct a;" +
            "        generate group, z, SUM(a.age), SUM(a.gpa);};");

    // make sure there is a combine plan in the explain output
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(baos);
    pigServer.explain("c", ps);
    assertFalse(baos.toString().matches("(?si).*combine plan.*"));

    HashMap<String, Object[]> results = new HashMap<String, Object[]>();
    results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 });
    results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 });
    results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 });
    Iterator<Tuple> it = pigServer.openIterator("c");
    while (it.hasNext()) {
        Tuple t = it.next();
        List<Object> fields = t.getAll();
        Object[] expected = results.get(fields.get(0));
        int i = 0;
        for (Object field : fields) {
            if (i == 1) {
                // ignore the second field which is a bag
                // for comparison here
                continue;
            }
            assertEquals(expected[i++], field);
        }
    }
    Util.deleteFile(cluster, "distinctNoCombinerInput.txt");
    pigServer.shutdown();
}
 
Example 13
Source File: TestCombiner.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testForEachNoCombiner() throws Exception {
    // test that combiner is NOT invoked when
    // one of the elements in the foreach generate
    // has a foreach in the plan without a distinct agg
    String input[] = {
                    "pig1\t18\t2.1",
                    "pig2\t24\t3.3",
                    "pig5\t45\t2.4",
                    "pig1\t18\t2.1",
                    "pig1\t19\t2.1",
                    "pig2\t24\t4.5",
                    "pig1\t20\t3.1" };

    Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input);
    PigServer pigServer = new PigServer(cluster.getExecType(), properties);
    pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);");
    pigServer.registerQuery("b = group a by name;");
    pigServer.registerQuery("c = foreach b  {" +
            "        z = a.age;" +
            "        generate group, z, SUM(a.age), SUM(a.gpa);};");

    // make sure there is a combine plan in the explain output
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(baos);
    pigServer.explain("c", ps);
    assertFalse(baos.toString().matches("(?si).*combine plan.*"));

    HashMap<String, Object[]> results = new HashMap<String, Object[]>();
    results.put("pig1", new Object[] { "pig1", "bag-place-holder", 75L, 9.4 });
    results.put("pig2", new Object[] { "pig2", "bag-place-holder", 48L, 7.8 });
    results.put("pig5", new Object[] { "pig5", "bag-place-holder", 45L, 2.4 });
    Iterator<Tuple> it = pigServer.openIterator("c");
    while (it.hasNext()) {
        Tuple t = it.next();
        List<Object> fields = t.getAll();
        Object[] expected = results.get(fields.get(0));
        int i = 0;
        for (Object field : fields) {
            if (i == 1) {
                // ignore the second field which is a bag
                // for comparison here
                continue;
            }
            assertEquals(expected[i++], field);
        }
    }
    Util.deleteFile(cluster, "forEachNoCombinerInput.txt");
    pigServer.shutdown();
}
 
Example 14
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testCogroupAfterDistinct() throws Exception {
    String[] input1 = {
            "abc",
            "abc",
            "def",
            "def",
            "def",
            "abc",
            "def",
            "ghi"
            };
    String[] input2 = {
        "ghi	4",
        "rst	12344",
        "uvw	1",
        "xyz	4141"
        };
    Util.createInputFile(cluster, "table1", input1);
    Util.createInputFile(cluster, "table2", input2);

    pigServer.registerQuery("nonuniqtable1 = LOAD 'table1' AS (f1:chararray);");
    pigServer.registerQuery("table1 = DISTINCT nonuniqtable1;");
    pigServer.registerQuery("table2 = LOAD 'table2' AS (f1:chararray, f2:int);");
    pigServer.registerQuery("temp = COGROUP table1 BY f1 INNER, table2 BY f1;");
    Iterator<Tuple> it = pigServer.openIterator("temp");

    // results should be:
    // (abc,{(abc)},{})
    // (def,{(def)},{})
    // (ghi,{(ghi)},{(ghi,4)})
    HashMap<String, Tuple> results = new HashMap<String, Tuple>();
    Object[] row = new Object[] { "abc",
            Util.createBagOfOneColumn(new String[] { "abc"}), mBf.newDefaultBag() };
    results.put("abc", Util.createTuple(row));
    row = new Object[] { "def",
            Util.createBagOfOneColumn(new String[] { "def"}), mBf.newDefaultBag() };
    results.put("def", Util.createTuple(row));
    Object[] thirdColContents = new Object[] { "ghi", 4 };
    Tuple t = Util.createTuple(thirdColContents);
    row = new Object[] { "ghi",
            Util.createBagOfOneColumn(new String[] { "ghi"}), Util.createBag(new Tuple[] { t })};
    results.put("ghi", Util.createTuple(row));

    while(it.hasNext()) {
        Tuple tup = it.next();
        List<Object> fields = tup.getAll();
        Tuple expected = results.get((String)fields.get(0));
        int i = 0;
        for (Object field : fields) {
            Assert.assertEquals(expected.get(i++), field);
        }
    }

    Util.deleteFile(cluster, "table1");
    Util.deleteFile(cluster, "table2");
}
 
Example 15
Source File: Stitch.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {

    if (input == null || input.size() == 0) return null;

    List<DataBag> bags = new ArrayList<DataBag>(input.size());

    for (int i = 0; i < input.size(); i++) {
        Object o = input.get(i);
        try {
            bags.add((DataBag)o);
        } catch (ClassCastException cce) {
            int errCode = 2107; // TODO not sure this is the right one
            String msg = "Stitch expected bags as input but argument " +
                i + " is a " + DataType.findTypeName(o);
            throw new ExecException(msg, errCode, PigException.INPUT);
        }
    }

    if (bags.size() == 1) return bags.get(0);

    DataBag output = BagFactory.getInstance().newDefaultBag();
    List<Iterator<Tuple>> iters = new ArrayList<Iterator<Tuple>>(bags.size());
    for (DataBag bag : bags) {
        iters.add(bag.iterator());
    }

    while (iters.get(0).hasNext()) {
        Tuple outTuple = TupleFactory.getInstance().newTuple();
        for (Iterator<Tuple> iter : iters) {
            if (iter.hasNext()) {
                Tuple t = iter.next();
                List<Object> fields = t.getAll();
                for (Object field : fields) {
                    outTuple.append(field);
                }
            }
        }
        output.add(outTuple);
    }
    return output;
}