Java Code Examples for org.apache.pig.data.TupleFactory#getInstance()

The following examples show how to use org.apache.pig.data.TupleFactory#getInstance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BagTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void tupleFromBagAccumulateTest() throws Exception
{
  TupleFactory tf = TupleFactory.getInstance();
  BagFactory bf = BagFactory.getInstance();
 
  TupleFromBag op = new TupleFromBag();
  
  Tuple defaultValue = tf.newTuple(1000);
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(4))), 0, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(9))), 0, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(16))), 0, defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(4));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(11))), 1, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(17))), 1, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(5))), 1, defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(17));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(), 2, defaultValue)));
  assertEquals(op.getValue(), defaultValue);
  op.cleanup();
}
 
Example 2
Source File: BagTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void firstTupleFromBagAccumulateTest() throws Exception
{
  TupleFactory tf = TupleFactory.getInstance();
  BagFactory bf = BagFactory.getInstance();
 
  FirstTupleFromBag op = new FirstTupleFromBag();
  
  Tuple defaultValue = tf.newTuple(1000);
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(4))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(9))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(16))), defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(4));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(11))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(17))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(5))), defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(11));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(), defaultValue)));
  assertEquals(op.getValue(), defaultValue);
  op.cleanup();
}
 
Example 3
Source File: TestProject.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetNextTupleMultipleProjections() throws IOException, ExecException {
    t = tRandom;
    proj.attachInput(t);
    proj.setOverloaded(true);
    int j = 0;
    ArrayList<Integer> cols = new ArrayList<Integer>();

    while (true) {
        cols.add(j);
        cols.add(j + 1);
        proj.setColumns(cols);
        res = proj.getNextTuple();
        if (res.returnStatus == POStatus.STATUS_EOP)
            break;
        TupleFactory tupleFactory = TupleFactory.getInstance();
        ArrayList<Object> objList = new ArrayList<Object>();
        objList.add(t.get(j));
        objList.add(t.get(j + 1));
        Tuple expectedResult = tupleFactory.newTuple(objList);
        assertEquals(POStatus.STATUS_OK, res.returnStatus);
        assertEquals(expectedResult, res.result);
        ++j;
        cols.clear();
    }

    proj.attachInput(t);
    proj.setColumn(8);
    proj.setOverloaded(false);
    res = proj.getNextTuple();
    assertEquals(POStatus.STATUS_OK, res.returnStatus);
    assertEquals(t.get(8), res.result);
}
 
Example 4
Source File: TestEvalPipelineLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException {
    TupleFactory tf = TupleFactory.getInstance();
    DataBag output = BagFactory.getInstance().newDefaultBag();
    output.add(tf.newTuple("a"));
    output.add(tf.newTuple("a"));
    output.add(tf.newTuple("a"));
    return output;
    
}
 
Example 5
Source File: TestPODistinct.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPODistictWithInt() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(r.nextInt(MAX_VALUE));
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example 6
Source File: TestDataModel.java    From spork with Apache License 2.0 5 votes vote down vote up
private Tuple giveMeOneOfEach() throws Exception {
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(11);
    Tuple t2 = tf.newTuple(2);

    t2.set(0, new Integer(3));
    t2.set(1, new Float(3.0));

    DataBag bag = BagFactory.getInstance().newDefaultBag();
    bag.add(tf.newTuple(new Integer(4)));
    bag.add(tf.newTuple(new String("mary had a little lamb")));

    Map<String, Object> map = new LinkedHashMap<String, Object>(2);
    map.put(new String("hello"), new String("world"));
    map.put(new String("goodbye"), new String("all"));

    t1.set(0, t2);
    t1.set(1, bag);
    t1.set(2, map);
    t1.set(3, new Integer(42));
    t1.set(4, new Long(5000000000L));
    t1.set(5, new Float(3.141592654));
    t1.set(6, new Double(2.99792458e8));
    t1.set(7, new Boolean(true));
    t1.set(8, new DataByteArray("hello"));
    t1.set(9, new String("goodbye"));

    return t1;
}
 
Example 7
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Object> exec(Tuple input) throws IOException {

    TupleFactory tupleFactory = TupleFactory.getInstance();
    ArrayList<Object> objList = new ArrayList<Object>();
    objList.add(new Integer(1));
    objList.add(new Double(1.0));
    objList.add(new Float(1.0));
    objList.add(new String("World!"));
    Tuple tuple = tupleFactory.newTuple(objList);

    BagFactory bagFactory = BagFactory.getInstance();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tuple);

    Map<String, Object> mapInMap = new HashMap<String, Object>();
    mapInMap.put("int", new Integer(10));
    mapInMap.put("float", new Float(10.0));

    Map<String, Object> myMap = new HashMap<String, Object>();
    myMap.put("string", new String("Hello"));
    myMap.put("int", new Integer(1));
    myMap.put("long", new Long(1));
    myMap.put("float", new Float(1.0));
    myMap.put("double", new Double(1.0));
    myMap.put("dba", new DataByteArray(new String("bytes").getBytes()));
    myMap.put("map", mapInMap);
    myMap.put("tuple", tuple);
    myMap.put("bag", bag);
    return myMap;
}
 
Example 8
Source File: POFRJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp,
        List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes,
        FileSpec[] replFiles, int fragment, boolean isLeftOuter,
        Tuple nullTuple,
        Schema[] inputSchemas,
        Schema[] keySchemas)
        throws ExecException {
    super(k, rp, inp);

    phyPlanLists = ppLists;
    this.fragment = fragment;
    this.keyTypes = keyTypes;
    this.replFiles = replFiles;
    replicates = new TupleToMapKey[ppLists.size()];
    LRs = new POLocalRearrange[ppLists.size()];
    constExps = new ConstantExpression[ppLists.size()];
    createJoinPlans(k);
    processingPlan = false;
    mTupleFactory = TupleFactory.getInstance();
    List<Tuple> tupList = new ArrayList<Tuple>();
    tupList.add(nullTuple);
    nullBag = new NonSpillableDataBag(tupList);
    this.isLeftOuterJoin = isLeftOuter;
    if (inputSchemas != null) {
        this.inputSchemas = inputSchemas;
    } else {
        this.inputSchemas = new Schema[replFiles == null ? 0 : replFiles.length];
    }
    if (keySchemas != null) {
        this.keySchemas = keySchemas;
    } else {
        this.keySchemas = new Schema[replFiles == null ? 0 : replFiles.length];
    }
}
 
Example 9
Source File: TestDataModel.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestTuple() throws Exception {
    TupleFactory tf = TupleFactory.getInstance();

    int[][] input1 = { { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 },
                       { 1, 2, 3, 4, 5 }, { 1, 2, 3, 4, 5 } };
    int[][] input2 = { { 1, 2 }, { 1, 2 } };

    Tuple n1 = Util.loadNestTuple(tf.newTuple(input1.length), input1);
    Tuple n2 = tf.newTuple();

    n2 = Util.loadNestTuple(tf.newTuple(input2.length), input2);
}
 
Example 10
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test(expected=org.apache.pig.backend.executionengine.ExecException.class)
public void testInvalidInputToBagToTupleUDF() throws Exception {
	TupleFactory tf = TupleFactory.getInstance();
	Tuple udfInput = tf.newTuple(1);
	// input contains tuple instead of bag
	udfInput.set(0, tf.newTuple());
	BagToTuple udf = new BagToTuple();

	// expecting an exception because the input if of type Tuple, not DataBag
	udf.exec(udfInput);
}
 
Example 11
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedTupleForBagToStringUDF() throws Exception {
	BagFactory bf = BagFactory.getInstance();
	TupleFactory tf = TupleFactory.getInstance();

	Tuple t1 = tf.newTuple(2);
	t1.set(0, "a");
	t1.set(1, 5);

	Tuple nestedTuple = tf.newTuple(2);
	nestedTuple.set(0, "d");
	nestedTuple.set(1, 7);

	Tuple t2 = tf.newTuple(3);
	t2.set(0, "c");
	t2.set(1, 6);
	t2.set(2, nestedTuple);

	DataBag inputBag = bf.newDefaultBag();
	inputBag.add(t1);
	inputBag.add(t2);

	BagToString udf = new BagToString();
	Tuple udfInput = tf.newTuple(2);
	udfInput.set(0, inputBag);
	udfInput.set(1, "_");
	String result = udf.exec(udfInput);

	assertEquals("a_5_c_6_(d,7)", result);
}
 
Example 12
Source File: PigRelSqlUdfs.java    From calcite with Apache License 2.0 5 votes vote down vote up
/**
 * Implementation for PIG_BAG functions. Builds a Pig DataBag from
 * the corresponding input
 *
 * @param elements Input that contains a bag
 * @return Pig Tuple
 */
public static Tuple buildBag(Object... elements) {
  final TupleFactory tupleFactory = TupleFactory.getInstance();
  final BagFactory bagFactory = BagFactory.getInstance();
  // Convert each row into a Tuple
  List<Tuple> tupleList = new ArrayList<>();
  if (elements != null) {
    // The first input contains a list of rows for the bag
    final List bag = (elements[0] instanceof List)
        ? (List) elements[0]
        : Collections.singletonList(elements[0]);
    for (Object row : bag) {
      tupleList.add(tupleFactory.newTuple(Arrays.asList(row)));
    }
  }

  // Then build a bag from the tuple list
  DataBag resultBag = bagFactory.newDefaultBag(tupleList);

  // The returned result is a new Tuple with the newly constructed DataBag
  // as the first item.
  List<Object> finalTuple = new ArrayList<>();
  finalTuple.add(resultBag);

  if (elements != null) {
    // Add the remaining elements from the input
    for (int i = 1; i < elements.length; i++) {
      finalTuple.add(elements[i]);
    }
  }

  return tupleFactory.newTuple(finalTuple);
}
 
Example 13
Source File: SampleEasyCubeAggregatorAsUDAF.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 * 
 * @throws Exception
 * @see com.linkedin.cubert.operator.cube.EasyCubeAggregator#output(org.apache.pig.data.Tuple,
 *      com.linkedin.cubert.operator.AggregationBuffer)
 */
@Override
public Object output(Object reUsedOutput, AggregationBuffer aggregationBuffer) throws ExecException
{
    Tuple resultTuple = (Tuple) reUsedOutput;
    if (resultTuple == null)
    {
        TupleFactory mTupleFactory = TupleFactory.getInstance();
        resultTuple = mTupleFactory.newTuple(2);
    }
    resultTuple.set(sumIndex, ((myAggregator) aggregationBuffer).getSum());
    resultTuple.set(sumSqIndex, ((myAggregator) aggregationBuffer).getSumSq());
    return resultTuple;
}
 
Example 14
Source File: ExampleEasyCubeAggregator.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public Object output(Object reUsedOutput, AggregationBuffer aggregationBuffer) throws ExecException
{
    Tuple resultTuple = (Tuple) reUsedOutput;
    if (resultTuple == null)
    {
        TupleFactory mTupleFactory = TupleFactory.getInstance();
        resultTuple = mTupleFactory.newTuple(2);
    }
    resultTuple.set(sumIndex, ((myAggregator) aggregationBuffer).getSum());
    resultTuple.set(sumSqIndex, ((myAggregator) aggregationBuffer).getSumSq());
    return resultTuple;
}
 
Example 15
Source File: TestRegex.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegexExtractAll() throws IOException {
    String matchRegex = "^(.+)\\b\\s+is a\\s+\\b(.+)$";
    TupleFactory tupleFactory = TupleFactory.getInstance();
    Tuple t1 = tupleFactory.newTuple(2);
    t1.set(0,"this is a match");
    t1.set(1, matchRegex);
    
    Tuple t2 = tupleFactory.newTuple(2);
    t2.set(0, "no match");
    t2.set(1, matchRegex);
    
    Tuple t3 = tupleFactory.newTuple(2);
    t3.set(0, null);
    t3.set(1, matchRegex);
 
    RegexExtractAll func = new RegexExtractAll();
    Tuple r = func.exec(t1);
    assertEquals(r.size(), 2);
    assertEquals("this", r.get(0));
    assertEquals("match", r.get(1));
    
    r = func.exec(t2);
    assertTrue(r==null);
    
    r = func.exec(t3);
    assertTrue(r==null);
}
 
Example 16
Source File: BagToTuple.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public Tuple exec(Tuple inputTuple) throws IOException {

	if (inputTuple.size() != 1) {
		throw new ExecException("Expecting 1 input, found " + inputTuple.size(), PigException.INPUT);
	}
	
	if (inputTuple.get(0) == null) {
		return null;
	}
	
	if (!(inputTuple.get(0) instanceof DataBag)) {
	  throw new ExecException("Usage BagToTuple(DataBag)", PigException.INPUT);			
	}
	
	
	DataBag inputBag = (DataBag) (inputTuple.get(0));
	try {
		Tuple outputTuple = null;
		
		long outputTupleSize = getOuputTupleSize(inputBag);

		// TupleFactory.newTuple(int size) can only support up to Integer.MAX_VALUE
		if (outputTupleSize > Integer.MAX_VALUE) {
			throw new ExecException("Input bag is too large", 105, PigException.INPUT);
		}

		TupleFactory tupleFactory = TupleFactory.getInstance();
		outputTuple = tupleFactory.newTuple((int) outputTupleSize);

		int fieldNum = 0;
		for (Tuple t : inputBag) {
			if (t != null) {
				for (int i = 0; i < t.size(); i++) {
					outputTuple.set(fieldNum++, t.get(i));
				}
			}
		}
		return outputTuple;
	} catch (Exception e) {
		String msg = "Encourntered error while flattening a bag to tuple"
				+ this.getClass().getSimpleName();
		throw new ExecException(msg, PigException.BUG, e);
	}
}
 
Example 17
Source File: LogicalPlanBuilder.java    From spork with Apache License 2.0 4 votes vote down vote up
static Tuple buildTuple(List<Object> objList) {
    TupleFactory tf = TupleFactory.getInstance();
    return tf.newTuple( objList );
}
 
Example 18
Source File: POMergeJoin.java    From spork with Apache License 2.0 4 votes vote down vote up
private void readObject(ObjectInputStream is) throws IOException, ClassNotFoundException, ExecException{

        is.defaultReadObject();
        mTupleFactory = TupleFactory.getInstance();
    }
 
Example 19
Source File: PigPerformanceLoader.java    From spork with Apache License 2.0 4 votes vote down vote up
public PigPerformanceLoader() {
    // Assume ^A as a delimiter
    super("");
    bagFactory = BagFactory.getInstance();
    tupleFactory = TupleFactory.getInstance();
}
 
Example 20
Source File: TestPOUserFunc.java    From spork with Apache License 2.0 4 votes vote down vote up
public void algebraicAVG(
                Integer[] input
              , Double initialExpectedSum, Long initialExpectedCount
              , Double intermedExpectedSum, Long intermedExpectedCount
              , Double expectedAvg
        ) throws IOException, ExecException {

               // generate data
	byte INIT = 0;
	byte INTERMED = 1;
	byte FINAL = 2;
	Tuple tup1 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1),
			input);
	Tuple tup2 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1),
			input);
	// System.out.println("Input = " + tup1);
	String funcSpec = AVG.class.getName() + "()";

	POUserFunc po = new POUserFunc(new OperatorKey("", r.nextLong()), -1,
			null, new FuncSpec(funcSpec));

               //************ Initial Calculations ******************
	TupleFactory tf = TupleFactory.getInstance();
	po.setAlgebraicFunction(INIT);
	po.attachInput(tup1);
	Tuple t = null;
	Result res = po.getNextTuple();
	Tuple outputInitial1 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result
			: null;
	Tuple outputInitial2 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result
			: null;
	System.out.println(outputInitial1 + " " + outputInitial2);
	assertEquals(outputInitial1, outputInitial2);
	Double sum = (Double) outputInitial1.get(0);
	Long count = (Long) outputInitial1.get(1);
	assertEquals(initialExpectedSum, sum);
	assertEquals(initialExpectedCount, count);

               //************ Intermediate Data and Calculations ******************
	DataBag bag = BagFactory.getInstance().newDefaultBag();
	bag.add(outputInitial1);
	bag.add(outputInitial2);
	Tuple outputInitial = tf.newTuple();
	outputInitial.append(bag);
	// Tuple outputIntermed = intermed.exec(outputInitial);
	po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null,
			new FuncSpec(funcSpec));
	po.setAlgebraicFunction(INTERMED);
	po.attachInput(outputInitial);
	res = po.getNextTuple();
	Tuple outputIntermed = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result
			: null;

	sum = (Double) outputIntermed.get(0);
	count = (Long) outputIntermed.get(1);
	assertEquals(intermedExpectedSum, sum);
	assertEquals(intermedExpectedCount, count);
	System.out.println(outputIntermed);

               //************ Final Calculations ******************
	po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null,
			new FuncSpec(funcSpec));
	po.setAlgebraicFunction(FINAL);
	po.attachInput(outputInitial);
	res = po.getNextTuple();
	Double output = (res.returnStatus == POStatus.STATUS_OK) ? (Double) res.result
			: null;
	// Double output = fin.exec(outputInitial);
	assertEquals((Double)expectedAvg, output);
	// System.out.println("output = " + output);

}