Java Code Examples for org.apache.pig.data.TupleFactory#newTuple()

The following examples show how to use org.apache.pig.data.TupleFactory#newTuple() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestProject.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetNextMultipleProjectionsWithNull() throws ExecException, IOException {
    t = tRandomAndNull;
    ArrayList<Integer> cols = new ArrayList<Integer>();
    proj.attachInput(t);
    for (int j = 0; j < t.size() - 1; j++) {
        proj.attachInput(t);
        cols.add(j);
        cols.add(j + 1);
        proj.setColumns(cols);

        res = proj.getNext();
        TupleFactory tupleFactory = TupleFactory.getInstance();
        ArrayList<Object> objList = new ArrayList<Object>();
        objList.add(t.get(j));
        objList.add(t.get(j + 1));
        Tuple expectedResult = tupleFactory.newTuple(objList);
        assertEquals(POStatus.STATUS_OK, res.returnStatus);
        assertEquals(expectedResult, res.result);
        cols.clear();
    }
}
 
Example 2
Source File: RegExLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple getNext() throws IOException {
  Pattern pattern = getPattern();
  Matcher matcher = pattern.matcher("");
  TupleFactory mTupleFactory = DefaultTupleFactory.getInstance();
  String line;
  
  while (in.nextKeyValue()) {
 Text val = in.getCurrentValue();
    line = val.toString();
    if (line.length() > 0 && line.charAt(line.length() - 1) == '\r') {
      line = line.substring(0, line.length() - 1);
    }
    matcher = matcher.reset(line);
    ArrayList<DataByteArray> list = new ArrayList<DataByteArray>();
    if (matcher.find()) {
      for (int i = 1; i <= matcher.groupCount(); i++) {
        list.add(new DataByteArray(matcher.group(i)));
      }
      return mTupleFactory.newTuple(list);  
    }
  }
  return null;
}
 
Example 3
Source File: TestProject.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetNextMultipleProjections() throws ExecException, IOException {
    t = tRandom;
    ArrayList<Integer> cols = new ArrayList<Integer>();
    proj.attachInput(t);
    for (int j = 0; j < t.size() - 1; j++) {
        proj.attachInput(t);
        cols.add(j);
        cols.add(j + 1);
        proj.setColumns(cols);

        res = proj.getNext();
        TupleFactory tupleFactory = TupleFactory.getInstance();
        ArrayList<Object> objList = new ArrayList<Object>();
        objList.add(t.get(j));
        objList.add(t.get(j + 1));
        Tuple expectedResult = tupleFactory.newTuple(objList);
        assertEquals(POStatus.STATUS_OK, res.returnStatus);
        assertEquals(expectedResult, res.result);
        cols.clear();
    }
}
 
Example 4
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBasicBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple t2 = tf.newTuple(2);
	t2.set(0, "c");
	t2.set(1, 6);

	DataBag bag = bf.newDefaultBag();
	bag.add(t1);
	bag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, bag);
	udfInput.set(1, "-");
	String result = udf.exec(udfInput);

	assertEquals("a-5-c-6", result);
}
 
Example 5
Source File: TestPODistinct.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPODistictWithIntAndNullValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(r.nextInt(MAX_VALUE));
        input.add(t);
        t = tf.newTuple();
        t.append(null);
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example 6
Source File: TestDataModel.java    From spork with Apache License 2.0 5 votes vote down vote up
private Tuple giveMeOneOfEach() throws Exception {
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(11);
    Tuple t2 = tf.newTuple(2);

    t2.set(0, new Integer(3));
    t2.set(1, new Float(3.0));

    DataBag bag = BagFactory.getInstance().newDefaultBag();
    bag.add(tf.newTuple(new Integer(4)));
    bag.add(tf.newTuple(new String("mary had a little lamb")));

    Map<String, Object> map = new LinkedHashMap<String, Object>(2);
    map.put(new String("hello"), new String("world"));
    map.put(new String("goodbye"), new String("all"));

    t1.set(0, t2);
    t1.set(1, bag);
    t1.set(2, map);
    t1.set(3, new Integer(42));
    t1.set(4, new Long(5000000000L));
    t1.set(5, new Float(3.141592654));
    t1.set(6, new Double(2.99792458e8));
    t1.set(7, new Boolean(true));
    t1.set(8, new DataByteArray("hello"));
    t1.set(9, new String("goodbye"));

    return t1;
}
 
Example 7
Source File: TestDataModel.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadWriteInternal() throws Exception {
    // Create a tuple with every internal data type in it, and then read and
    // write it, both via DataReaderWriter and Tuple.readFields
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(1);

    InternalMap map = new InternalMap(2);
    map.put(new Integer(1), new String("world"));
    map.put(new Long(3L), new String("all"));
    t1.set(0, map);

    File file = File.createTempFile("Tuple", "put");
    FileOutputStream fos = new FileOutputStream(file);
    DataOutput out = new DataOutputStream(fos);
    t1.write(out);
    fos.close();

    FileInputStream fis = new FileInputStream(file);
    DataInput in = new DataInputStream(fis);

    Tuple after = tf.newTuple();
    after.readFields(in);

    Object o = after.get(0);
    assertTrue("isa InternalMap", o instanceof InternalMap);

    InternalMap m = (InternalMap)o;
    assertEquals("world", (String)m.get(new Integer(1)));
    assertEquals("all", (String)m.get(new Long(3L)));
    assertNull(m.get("fred"));

    file.delete();
}
 
Example 8
Source File: TestPODistinct.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPODistictWithNullIntValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(null);
        t.append(r.nextInt(MAX_VALUE));
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example 9
Source File: TestPODistinct.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPODistictWithIntNullValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(r.nextInt(MAX_VALUE));
        t.append(null);
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example 10
Source File: TestPODistinct.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPODistictWithNullValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(null);
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example 11
Source File: SkewedJoinConverter.java    From spork with Apache License 2.0 5 votes vote down vote up
public Iterator<Tuple> iterator() {
    return new IteratorTransform<Tuple2<Object, Tuple2<Tuple, Tuple>>, Tuple>(
            in) {
        @Override
        protected Tuple transform(
                Tuple2<Object, Tuple2<Tuple, Tuple>> next) {
            try {

                Tuple leftTuple = next._2._1;
                Tuple rightTuple = next._2._2;

                TupleFactory tf = TupleFactory.getInstance();
                Tuple result = tf.newTuple(leftTuple.size()
                        + rightTuple.size());

                // append the two tuples together to make a
                // resulting tuple
                for (int i = 0; i < leftTuple.size(); i++)
                    result.set(i, leftTuple.get(i));
                for (int i = 0; i < rightTuple.size(); i++)
                    result.set(i + leftTuple.size(),
                            rightTuple.get(i));

                System.out.println("MJC: Result = "
                        + result.toDelimitedString(" "));

                return result;

            } catch (Exception e) {
                System.out.println(e);
            }
            return null;
        }
    };
}
 
Example 12
Source File: TestDataModel.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleHashCode() throws Exception {
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(2);
    t1.set(0, new DataByteArray("hello world"));
    t1.set(1, new Integer(1));

    Tuple t2 = tf.newTuple(2);
    t2.set(0, new DataByteArray("hello world"));
    t2.set(1, new Integer(1));

    assertEquals("same data", t1.hashCode(), t2.hashCode());

    Tuple t3 = tf.newTuple(3);
    t3.set(0, new DataByteArray("hello world"));
    t3.set(1, new Integer(1));
    t3.set(2, new Long(4));
    assertFalse("different size", t1.hashCode() == t3.hashCode());

    Tuple t4 = tf.newTuple(2);
    t4.set(0, new DataByteArray("hello world"));
    t4.set(1, new Integer(2));
    assertFalse("same size, different data", t1.hashCode() == t4.hashCode());

    // Make sure we can take the hash code of all the types.
    Tuple t5 = giveMeOneOfEach();
    t5.hashCode();
}
 
Example 13
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test(expected=org.apache.pig.backend.executionengine.ExecException.class)
public void testInvalidInputForBagToStringUDF() throws Exception {
	TupleFactory tf = TupleFactory.getInstance();
	Tuple udfInput = tf.newTuple(1);
	// input contains tuple instead of bag
	udfInput.set(0, tf.newTuple());
	BagToString udf = new BagToString();

	// expecting an exception because the input if of type Tuple, not DataBag
	udf.exec(udfInput);
}
 
Example 14
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedTupleForBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple nestedTuple = tf.newTuple(2);
	nestedTuple.set(0, "d");
	nestedTuple.set(1, 7);

	Tuple t2 = tf.newTuple(3);
	t2.set(0, "c");
	t2.set(1, 6);
	t2.set(2, nestedTuple);

	DataBag inputBag = bf.newDefaultBag();
	inputBag.add(t1);
	inputBag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, inputBag);
	udfInput.set(1, "_");
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6_(d,7)", result);
}
 
Example 15
Source File: TestProject.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetNextTupleMultipleProjectionsWithNull() throws IOException, ExecException {
    t = tRandomAndNull;
    proj.attachInput(t);
    proj.setOverloaded(true);
    int j = 0;
    ArrayList<Integer> cols = new ArrayList<Integer>();

    while (true) {
        cols.add(j);
        cols.add(j + 1);
        proj.setColumns(cols);
        res = proj.getNextTuple();
        if (res.returnStatus == POStatus.STATUS_EOP)
            break;
        TupleFactory tupleFactory = TupleFactory.getInstance();
        ArrayList<Object> objList = new ArrayList<Object>();
        objList.add(t.get(j));
        objList.add(t.get(j + 1));
        Tuple expectedResult = tupleFactory.newTuple(objList);
        assertEquals(POStatus.STATUS_OK, res.returnStatus);
        assertEquals(expectedResult, res.result);
        ++j;
        cols.clear();
    }

    proj.attachInput(t);
    proj.setColumn(8);
    proj.setOverloaded(false);
    res = proj.getNextTuple();
    assertEquals(POStatus.STATUS_OK, res.returnStatus);
    assertEquals(t.get(8), res.result);
}
 
Example 16
Source File: ExampleEasyCubeAggregator.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public Object output(Object reUsedOutput, AggregationBuffer aggregationBuffer) throws ExecException
{
    Tuple resultTuple = (Tuple) reUsedOutput;
    if (resultTuple == null)
    {
        TupleFactory mTupleFactory = TupleFactory.getInstance();
        resultTuple = mTupleFactory.newTuple(2);
    }
    resultTuple.set(sumIndex, ((myAggregator) aggregationBuffer).getSum());
    resultTuple.set(sumSqIndex, ((myAggregator) aggregationBuffer).getSumSq());
    return resultTuple;
}
 
Example 17
Source File: LookUpTable.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("NullableProblems")
@Override
public Set<Tuple> keySet()
{
    final int nKeyColumns = comparatorIndices.length;
    final TupleFactory factory = TupleFactory.getInstance();
    final Tuple reuse = newTuple();

    final Set<Tuple> keys = new HashSet<Tuple>();
    try
    {
        for (int offset : offsetArr)
        {
            /* For every new key the sign bit is set. Thus, ignore all others */
            if (offset >= 0)
            {
                continue;
            }

            /* Mask out the offset and fetch from store */
            offset = offset & MASK;
            store.getTuple(offset, reuse);

            /* Create a key tuple and add it to the set */
            final Tuple t = factory.newTuple(nKeyColumns);
            for (int c = 0; c < nKeyColumns; ++c)
            {
                t.set(c, reuse.get(comparatorIndices[c]));
            }
            keys.add(t);
        }
    }
    catch (ExecException e)
    {
        throw new RuntimeException(e);
    }
    return keys;
}
 
Example 18
Source File: TestDataModel.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testReadWrite() throws Exception {
    // Create a tuple with every data type in it, and then read and
    // write it, both via DataReaderWriter and Tuple.readFields
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = giveMeOneOfEach();

    File file = File.createTempFile("Tuple", "put");
    FileOutputStream fos = new FileOutputStream(file);
    DataOutput out = new DataOutputStream(fos);
    t1.write(out);
    t1.write(out); // twice in a row on purpose
    fos.close();

    FileInputStream fis = new FileInputStream(file);
    DataInput in = new DataInputStream(fis);
    for (int i = 0; i < 2; i++) {
        Tuple after = tf.newTuple();
        after.readFields(in);

        Object o = after.get(0);
        assertTrue("isa Tuple", o instanceof Tuple);
        Tuple t3 = (Tuple)o;
        o = t3.get(0);
        assertTrue("isa Integer", o instanceof Integer);
        assertEquals(new Integer(3), (Integer)o);
        o = t3.get(1);
        assertTrue("isa Float", o instanceof Float);
        assertEquals(new Float(3.0), (Float)o);

        o = after.get(1);
        assertTrue("isa Bag", o instanceof DataBag);
        DataBag b = (DataBag)o;
        Iterator<Tuple> j = b.iterator();
        Tuple[] ts = new Tuple[2];
        assertTrue("first tuple in bag", j.hasNext());
        ts[0] = j.next();
        assertTrue("second tuple in bag", j.hasNext());
        ts[1] = j.next();
        o = ts[0].get(0);
        assertTrue("isa Integer", o instanceof Integer);
        assertEquals(new Integer(4), (Integer)o);
        o = ts[1].get(0);
        assertTrue("isa String", o instanceof String);
        assertEquals("mary had a little lamb", (String)o);

        o = after.get(2);
        assertTrue("isa Map", o instanceof Map);
        Map<String, Object> m = (Map<String, Object>)o;
        assertEquals("world", (String)m.get("hello"));
        assertEquals("all", (String)m.get("goodbye"));
        assertNull(m.get("fred"));

        o = after.get(3);
        assertTrue("isa Integer", o instanceof Integer);
        Integer ii = (Integer)o;
        assertEquals(new Integer(42), ii);

        o = after.get(4);
        assertTrue("isa Long", o instanceof Long);
        Long l = (Long)o;
        assertEquals(new Long(5000000000L), l);

        o = after.get(5);
        assertTrue("isa Float", o instanceof Float);
        Float f = (Float)o;
        assertEquals(new Float(3.141592654), f);

        o = after.get(6);
        assertTrue("isa Double", o instanceof Double);
        Double d = (Double)o;
        assertEquals(new Double(2.99792458e8), d);

        o = after.get(7);
        assertTrue("isa Boolean", o instanceof Boolean);
        Boolean bool = (Boolean)o;
        assertTrue(bool);

        o = after.get(8);
        assertTrue("isa DataByteArray", o instanceof DataByteArray);
        DataByteArray ba = (DataByteArray)o;
        assertEquals(new DataByteArray("hello"), ba);

        o = after.get(9);
        assertTrue("isa String", o instanceof String);
        String s = (String)o;
        assertEquals("goodbye", s);
    }

    file.delete();
}
 
Example 19
Source File: TestDataModel.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultiFieldTupleCompareTo() throws Exception {
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple();
    Tuple t2 = tf.newTuple();

    t1.append(new DataByteArray("bbb"));
    t1.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("bbb"));

    assertEquals("same data equal", 0, t1.compareTo(t2));

    t2 = tf.newTuple();
    t2.append(new DataByteArray("aaa"));
    t2.append(new DataByteArray("aaa"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    t2 = tf.newTuple();
    t2.append(new DataByteArray("ddd"));
    t2.append(new DataByteArray("ddd"));
    assertTrue("less than tuple with greater value", 0 > t1.compareTo(t2));

    // First column same, second lesser
    t2 = tf.newTuple();
    t2.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("aaa"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    // First column same, second greater
    t2 = tf.newTuple();
    t2.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("ccc"));
    assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));

    // First column less, second same
    t2 = tf.newTuple();
    t2.append(new DataByteArray("aaa"));
    t2.append(new DataByteArray("bbb"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    // First column greater, second same
    t2 = tf.newTuple();
    t2.append(new DataByteArray("ccc"));
    t2.append(new DataByteArray("bbb"));
    assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));

    // First column less, second greater
    t2 = tf.newTuple();
    t2.append(new DataByteArray("aaa"));
    t2.append(new DataByteArray("ccc"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    // First column greater, second same
    t2 = tf.newTuple();
    t2.append(new DataByteArray("ccc"));
    t2.append(new DataByteArray("aaa"));
    assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));
}
 
Example 20
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testExpressionTypeChecking8() throws Throwable {
    LogicalExpressionPlan plan = new LogicalExpressionPlan();

    TupleFactory tupleFactory = TupleFactory.getInstance();

    ArrayList<Object> innerObjList = new ArrayList<Object>();
    ArrayList<Object> objList = new ArrayList<Object>();

    innerObjList.add(10);
    innerObjList.add(3);
    innerObjList.add(7);
    innerObjList.add(17);

    Tuple innerTuple = tupleFactory.newTuple(innerObjList);

    objList.add("World");
    objList.add(42);
    objList.add(innerTuple);

    Tuple tuple = tupleFactory.newTuple(objList);

    ArrayList<Schema.FieldSchema> innerFss = new ArrayList<Schema.FieldSchema>();
    ArrayList<Schema.FieldSchema> fss = new ArrayList<Schema.FieldSchema>();
    ArrayList<Schema.FieldSchema> castFss = new ArrayList<Schema.FieldSchema>();

    Schema.FieldSchema stringFs = new Schema.FieldSchema(null, DataType.CHARARRAY);
    Schema.FieldSchema intFs = new Schema.FieldSchema(null, DataType.INTEGER);

    for(int i = 0; i < innerObjList.size(); ++i) {
        innerFss.add(intFs);
    }

    Schema innerTupleSchema = new Schema(innerFss);

    fss.add(stringFs);
    fss.add(intFs);
    fss.add(new Schema.FieldSchema(null, innerTupleSchema, DataType.TUPLE));

    Schema tupleSchema = new Schema(fss);

    Schema.FieldSchema byteArrayFs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
    for(int i = 0; i < 4; ++i) {
        castFss.add(byteArrayFs);
    }

    Schema castSchema = new Schema(castFss);


    ConstantExpression constant1 = new ConstantExpression(plan, innerTuple);
    ConstantExpression constant2 =  new ConstantExpression(plan, tuple);
    CastExpression cast1 = new CastExpression(plan, constant1,
            org.apache.pig.newplan.logical.Util.translateFieldSchema(new FieldSchema(null, castSchema, DataType.TUPLE)));

    EqualExpression equal1 = new EqualExpression(plan, cast1, constant2);

    CompilationMessageCollector collector = new CompilationMessageCollector();

    LogicalRelationalOperator dummyRelOp = createDummyRelOpWithAlias();
    TypeCheckingExpVisitor expTypeChecker = new TypeCheckingExpVisitor(plan, collector, dummyRelOp);
    expTypeChecker.visit();
    printMessageCollector(collector);
    //printTypeGraph(plan);

    if (collector.hasError()) {
        throw new Exception("Error during type checking");
    }

    assertEquals(DataType.BOOLEAN, equal1.getType());
    assertEquals(DataType.TUPLE, equal1.getRhs().getType());
    assertEquals(DataType.TUPLE, equal1.getLhs().getType());
}