Java Code Examples for org.apache.pig.data.Tuple#append()

The following examples show how to use org.apache.pig.data.Tuple#append() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestForEachStar.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testForeachStarSchemaUnkown() throws IOException, ParserException{
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INPUT_FILE + "' ;"
        + "f1 = foreach l1 generate * ;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    pig.explain("f1",System.out);
    Iterator<Tuple> it = pig.openIterator("f1");
    
    
    Tuple expectedResCharArray = (Tuple)Util.getPigConstant("('one','two')");
    Tuple expectedRes = TupleFactory.getInstance().newTuple();
    for(Object field :  expectedResCharArray.getAll() ){
        expectedRes.append(new DataByteArray(field.toString()));
    }
    assertTrue("has output", it.hasNext());
    assertEquals(expectedRes, it.next());
}
 
Example 2
Source File: TestBinInterSedes.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleWriteReadLongDiffSizes() throws IOException {
        Random r = new Random(100L);

        Tuple tuple = TupleFactory.getInstance().newTuple();

        tuple.append(new Long(0));
        tuple.append(new Long(1));
        tuple.append(new Long(-1));
        tuple.append(new Long(300));
        tuple.append(new Long(600));
        tuple.append(new Long(10000));
        tuple.append(new Long(-10000));
        tuple.append(new Long(5000000000000000000L));
        tuple.append(new Long(-5000000000000000000L));

        for (int i = 0; i < 100000; i++) {
            tuple.append(new Long(r.nextLong()));
        }

        testTupleSedes(tuple);
}
 
Example 3
Source File: TestBinInterSedes.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * test sedes of int of diff sizes
 * @throws IOException
 */
@Test
public void testTupleWriteReadIntDiffSizes() throws IOException {
        //create a tuple with integer columns of different sizes
        Tuple tuple = TupleFactory.getInstance().newTuple();
        tuple.append(new Integer(0)); //boolean rep
        tuple.append(new Integer(1)); //boolean rep
        tuple.append(new Integer(125));  //fits into byte
        tuple.append(new Integer(1024)); //fits into short
        tuple.append(new Integer(1024*1024*1024)); //fits into int (=~ 2 ^30)
        
        testTupleSedes(tuple);
}
 
Example 4
Source File: AppendIndex.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void accumulate(Tuple input) throws IOException {
    if (interBag == null) {
        interBag = mBagFactory.newDefaultBag();
        ct = 0;
    }
    for (Tuple t : (DataBag)input.get(0)) {
        Tuple t2 = mTupleFactory.newTupleNoCopy(t.getAll());
        t2.append(++ct);
        interBag.add(t2);
    }
}
 
Example 5
Source File: TestFilterUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilterUDFusingDefine() throws Exception {
    File inputFile = createFile(
            new String[] {
                            "www.paulisageek.com\t4",
                            "www.yahoo.com\t12344",
                            "google.com\t1",
                            "us2.amazon.com\t4141"
            }
            );

    File filterFile = createFile(
            new String[] {
                            "12344"
            }
            );

    pigServer.registerQuery("define FILTER_CRITERION "
            + FILTERFROMFILE.class.getName()
            + "('"
            + Util.generateURI(filterFile.toString(), pigServer
                    .getPigContext()) + "');");
    pigServer.registerQuery("a = LOAD '"
            + Util.generateURI(inputFile.toString(), pigServer
                    .getPigContext())
            + "' as (url:chararray, numvisits:int);");
    pigServer.registerQuery("b = filter a by FILTER_CRITERION(numvisits);");

    Tuple expectedTuple = tf.newTuple();
    expectedTuple.append(new String("www.yahoo.com"));
    expectedTuple.append(new Integer("12344"));

    Iterator<Tuple> iter = pigServer.openIterator("b");
    while (iter.hasNext()) {
        Tuple t = iter.next();
        assertTrue(t.equals(expectedTuple));
    }
}
 
Example 6
Source File: TestLocalRearrange.java    From spork with Apache License 2.0 5 votes vote down vote up
private void setUp1() throws PlanException, ExecException{
    lr = GenPhyOp.topLocalRearrangeOPWithPlanPlain(0,0,db.iterator().next());
    POProject proj = GenPhyOp.exprProject();
    proj.setColumn(0);
    proj.setResultType(DataType.TUPLE);
    proj.setOverloaded(true);
    Tuple t = new DefaultTuple();
    t.append(db);
    proj.attachInput(t);
    List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>();
    inputs.add(proj);
    lr.setInputs(inputs);
}
 
Example 7
Source File: TestBinInterSedes.java    From spork with Apache License 2.0 5 votes vote down vote up
private Tuple createTupleWithManyCols(int size) {
    Tuple t = TupleFactory.getInstance().newTuple(size);
    Integer col = Integer.valueOf(1);
    for(int i=0; i<size; i++){
        t.append(col);
    }
    return t;
}
 
Example 8
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static DataBag genFloatDataBag(Random r, int column, int row) {
    DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
    for (int i=0;i<row;i++) {
        Tuple t = TupleFactory.getInstance().newTuple();
        for (int j=0;j<column;j++) {
            t.append(r.nextFloat()*1000);
        }
        db.add(t);
    }
    return db;
}
 
Example 9
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleSizeWithBooleans() {
    Tuple t = mTupleFactory.newTuple();
    t.append(new Boolean(true));
    t.append(new Boolean(false));
    long size = t.getMemorySize();
    assertEquals("tuple size", 120, size);
}
 
Example 10
Source File: TestHelper.java    From spork with Apache License 2.0 5 votes vote down vote up
private static Tuple trimTuple(Tuple t){
    Tuple ret = TupleFactory.getInstance().newTuple();
    for (Object o : t.getAll()) {
        DataByteArray dba = (DataByteArray)o;
        DataByteArray nDba = new DataByteArray(dba.toString().trim().getBytes());
        ret.append(nDba);
    }
    return ret;
}
 
Example 11
Source File: TestToBagToTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void toTuple() throws Exception{
    ToTuple tb = new ToTuple();

    Tuple input = TupleFactory.getInstance().newTuple();
    for (int i = 0; i < 100; ++i) {
        input.append(i);
    }

    Tuple output = tb.exec(input);
    Assert.assertFalse(input == output);
    Assert.assertEquals(input, output);
}
 
Example 12
Source File: TestBinInterSedes.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * create bag having given number of tuples
 * @param size
 * @return
 */
private DataBag createBag(int size) {
    Tuple innerTuple = TupleFactory.getInstance().newTuple();
    innerTuple.append(Integer.valueOf(1));
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    for(int i=0; i<size; i++){
        bag.add(innerTuple);
    }
    return bag;
}
 
Example 13
Source File: DumpStreamer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple deserialize(byte[] bytes) throws IOException {
    String line = new String(bytes, utf8);
    Tuple t = DefaultTupleFactory.getInstance().newTuple();
    String tmp = line.substring(1, line.length() - 2);
    String[] fields = tmp.split(",");
    int i;
    for (i = 0; i < fields.length; i++)
        t.append(fields[i].trim());
            
    return t;
}
 
Example 14
Source File: TestPODistinct.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPODistictWithNullValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(null);
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example 15
Source File: TestDataModel.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultiFieldTupleCompareTo() throws Exception {
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple();
    Tuple t2 = tf.newTuple();

    t1.append(new DataByteArray("bbb"));
    t1.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("bbb"));

    assertEquals("same data equal", 0, t1.compareTo(t2));

    t2 = tf.newTuple();
    t2.append(new DataByteArray("aaa"));
    t2.append(new DataByteArray("aaa"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    t2 = tf.newTuple();
    t2.append(new DataByteArray("ddd"));
    t2.append(new DataByteArray("ddd"));
    assertTrue("less than tuple with greater value", 0 > t1.compareTo(t2));

    // First column same, second lesser
    t2 = tf.newTuple();
    t2.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("aaa"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    // First column same, second greater
    t2 = tf.newTuple();
    t2.append(new DataByteArray("bbb"));
    t2.append(new DataByteArray("ccc"));
    assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));

    // First column less, second same
    t2 = tf.newTuple();
    t2.append(new DataByteArray("aaa"));
    t2.append(new DataByteArray("bbb"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    // First column greater, second same
    t2 = tf.newTuple();
    t2.append(new DataByteArray("ccc"));
    t2.append(new DataByteArray("bbb"));
    assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));

    // First column less, second greater
    t2 = tf.newTuple();
    t2.append(new DataByteArray("aaa"));
    t2.append(new DataByteArray("ccc"));
    assertTrue("greater than tuple with lesser value", 0 < t1.compareTo(t2));

    // First column greater, second same
    t2 = tf.newTuple();
    t2.append(new DataByteArray("ccc"));
    t2.append(new DataByteArray("aaa"));
    assertTrue("greater than tuple with lesser value", 0 > t1.compareTo(t2));
}
 
Example 16
Source File: PhoenixHBaseStorerIT.java    From phoenix with Apache License 2.0 4 votes vote down vote up
/**
 * Basic test - writes data to a Phoenix table and compares the data written
 * to expected
 * 
 * @throws Exception
 */
@Test
public void testStorer() throws Exception {
    final String tableName = "TABLE1";
    final Statement stmt = conn.createStatement();

    stmt.execute("CREATE TABLE " + tableName +
             " (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR)");

    final Data data = Storage.resetData(pigServer);
    final Collection<Tuple> list = Lists.newArrayList();

    // Create input dataset
    int rows = 100;
    for (int i = 0; i < rows; i++) {
        Tuple t = tupleFactory.newTuple();
        t.append(i);
        t.append("a" + i);
        list.add(t);
    }
    data.set("in", "id:int, name:chararray", list);

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");

    pigServer.registerQuery("Store A into 'hbase://" + tableName
                           + "' using " + PhoenixHBaseStorage.class.getName() + "('"
                            + zkQuorum + "', '-batchSize 1000');");

     // Now run the Pig script
    if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
        throw new RuntimeException("Job failed", pigServer.executeBatch()
                .get(0).getException());
    }

    // Compare data in Phoenix table to the expected
    final ResultSet rs = stmt
            .executeQuery("SELECT id, name FROM table1 ORDER BY id");

    for (int i = 0; i < rows; i++) {
        assertTrue(rs.next());
        assertEquals(i, rs.getInt(1));
        assertEquals("a" +  i, rs.getString(2));
    }
}
 
Example 17
Source File: TestStore.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testStoreComplexDataWithNull() throws Exception {
    Tuple inputTuple = GenRandomData.genRandSmallBagTextTupleWithNulls(new Random(), 10, 100);
    inpDB = DefaultBagFactory.getInstance().newDefaultBag();
    inpDB.add(inputTuple);
    storeAndCopyLocally(inpDB);
    PigStorage ps = new PigStorage("\t");
    BufferedReader br = new BufferedReader(new FileReader(outputFileName));
    for(String line=br.readLine();line!=null;line=br.readLine()){
        System.err.println("Complex data: ");
        System.err.println(line);
        String[] flds = line.split("\t",-1);
        Tuple t = new DefaultTuple();

        ResourceFieldSchema stringfs = new ResourceFieldSchema();
        stringfs.setType(DataType.CHARARRAY);
        ResourceFieldSchema intfs = new ResourceFieldSchema();
        intfs.setType(DataType.INTEGER);
        ResourceFieldSchema bytefs = new ResourceFieldSchema();
        bytefs.setType(DataType.BYTEARRAY);

        ResourceSchema tupleSchema = new ResourceSchema();
        tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs});
        ResourceFieldSchema tuplefs = new ResourceFieldSchema();
        tuplefs.setSchema(tupleSchema);
        tuplefs.setType(DataType.TUPLE);

        ResourceSchema bagSchema = new ResourceSchema();
        bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
        ResourceFieldSchema bagfs = new ResourceFieldSchema();
        bagfs.setSchema(bagSchema);
        bagfs.setType(DataType.BAG);

        ResourceSchema mapSchema = new ResourceSchema();
        mapSchema.setFields(new ResourceFieldSchema[]{bytefs});
        ResourceFieldSchema mapfs = new ResourceFieldSchema();
        mapfs.setSchema(mapSchema);
        mapfs.setType(DataType.MAP);

        t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null);
        t.append(flds[1].compareTo("")!=0 ? new DataByteArray(flds[1].getBytes()) : null);
        t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null);
        t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null);
        t.append(flds[4].compareTo("")!=0 ? ps.getLoadCaster().bytesToFloat(flds[4].getBytes()) : null);
        t.append(flds[5].compareTo("")!=0 ? ps.getLoadCaster().bytesToInteger(flds[5].getBytes()) : null);
        t.append(flds[6].compareTo("")!=0 ? ps.getLoadCaster().bytesToLong(flds[6].getBytes()) : null);
        t.append(flds[7].compareTo("")!=0 ? ps.getLoadCaster().bytesToMap(flds[7].getBytes(), mapfs) : null);
        t.append(flds[8].compareTo("")!=0 ? ps.getLoadCaster().bytesToTuple(flds[8].getBytes(), tuplefs) : null);
        t.append(flds[9].compareTo("")!=0 ? ps.getLoadCaster().bytesToBoolean(flds[9].getBytes()) : null);
        t.append(flds[10].compareTo("")!=0 ? ps.getLoadCaster().bytesToDateTime(flds[10].getBytes()) : null);
        t.append(flds[11].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[10].getBytes()) : null);
        assertEquals(inputTuple, t);
    }
    br.close();
}
 
Example 18
Source File: PhoenixHBaseStorerIT.java    From phoenix with Apache License 2.0 4 votes vote down vote up
/**
 * Test storage of DataByteArray columns to Phoenix
 * Maps the DataByteArray with the target PhoenixDataType and persists in HBase. 
* @throws Exception
 */
@Test
public void testStoreWithBinaryDataTypes() throws Exception {
 
	final String tableName = "TABLE3";
    final Statement stmt = conn.createStatement();

    stmt.execute("CREATE TABLE " + tableName +
            " (col1 BIGINT NOT NULL, col2 INTEGER , col3 FLOAT, col4 DOUBLE , col5 TINYINT , " +
            "  col6 BOOLEAN , col7 VARBINARY CONSTRAINT my_pk PRIMARY KEY (col1))");

    final Data data = Storage.resetData(pigServer);
    final Collection<Tuple> list = Lists.newArrayList();

    int rows = 10;
    for (int i = 1; i <= rows; i++) {
        Tuple t = tupleFactory.newTuple();
        t.append(i);
        t.append(new DataByteArray(Bytes.toBytes(i * 5)));
        t.append(new DataByteArray(Bytes.toBytes(i * 10.0F)));
        t.append(new DataByteArray(Bytes.toBytes(i * 15.0D)));
        t.append(new DataByteArray(Bytes.toBytes(i)));
        t.append(new DataByteArray(Bytes.toBytes( i % 2 == 0)));
        t.append(new DataByteArray(Bytes.toBytes(i)));
        list.add(t);
    }
    data.set("in", "col1:int,col2:bytearray,col3:bytearray,col4:bytearray,col5:bytearray,col6:bytearray,col7:bytearray ", list);

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");

    pigServer.registerQuery("Store A into 'hbase://" + tableName
                           + "' using " + PhoenixHBaseStorage.class.getName() + "('"
                            + zkQuorum + "', '-batchSize 1000');");

    if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
        throw new RuntimeException("Job failed", pigServer.executeBatch()
                .get(0).getException());
    }

    final ResultSet rs = stmt
            .executeQuery(String.format("SELECT col1 , col2 , col3 , col4 , col5 , col6, col7  FROM %s ORDER BY col1" , tableName));

    int count = 0;
    for (int i = 1; i <= rows; i++) {
        assertTrue(rs.next());
        assertEquals(i, rs.getInt(1));
        assertEquals(i * 5, rs.getInt(2));
        assertEquals(i * 10.0F, rs.getFloat(3),0.0);
        assertEquals(i * 15.0D, rs.getInt(4),0.0);
        assertEquals(i,rs.getInt(5));
        assertEquals(i % 2 == 0, rs.getBoolean(6));
        assertArrayEquals(Bytes.toBytes(i), rs.getBytes(7));
        count++;
    }
    assertEquals(rows, count);
 }
 
Example 19
Source File: PhoenixHBaseStorerIT.java    From phoenix with Apache License 2.0 4 votes vote down vote up
@Test
public void testStoreWithDateTime() throws Exception {
 
	final String tableName = "TABLE4";
    final Statement stmt = conn.createStatement();

    stmt.execute("CREATE TABLE " + tableName +
            " (col1 BIGINT NOT NULL, col2 DATE , col3 TIME, " +
            " col4 TIMESTAMP CONSTRAINT my_pk PRIMARY KEY (col1))");

    long now = System.currentTimeMillis();
    final DateTime dt = new DateTime(now);
    
    final Data data = Storage.resetData(pigServer);
    final Collection<Tuple> list = Lists.newArrayList();
    Tuple t = tupleFactory.newTuple();
    
    t.append(1);
    t.append(dt);
    t.append(dt);
    t.append(dt);
   
    list.add(t);
    
    data.set("in", "col1:int,col2:datetime,col3:datetime,col4:datetime", list);

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");

    pigServer.registerQuery("Store A into 'hbase://" + tableName
                           + "' using " + PhoenixHBaseStorage.class.getName() + "('"
                            + zkQuorum + "', '-batchSize 1000');");

    if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
        throw new RuntimeException("Job failed", pigServer.executeBatch()
                .get(0).getException());
    }

    final ResultSet rs = stmt
            .executeQuery(String.format("SELECT col1 , col2 , col3 , col4 FROM %s " , tableName));

    assertTrue(rs.next());
    assertEquals(1, rs.getInt(1));
    assertEquals(now, rs.getDate(2).getTime());
    assertEquals(now, rs.getTime(3).getTime());
    assertEquals(now, rs.getTimestamp(4).getTime());
 
}
 
Example 20
Source File: TestPOUserFunc.java    From spork with Apache License 2.0 4 votes vote down vote up
public void algebraicAVG(
                Integer[] input
              , Double initialExpectedSum, Long initialExpectedCount
              , Double intermedExpectedSum, Long intermedExpectedCount
              , Double expectedAvg
        ) throws IOException, ExecException {

               // generate data
	byte INIT = 0;
	byte INTERMED = 1;
	byte FINAL = 2;
	Tuple tup1 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1),
			input);
	Tuple tup2 = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1),
			input);
	// System.out.println("Input = " + tup1);
	String funcSpec = AVG.class.getName() + "()";

	POUserFunc po = new POUserFunc(new OperatorKey("", r.nextLong()), -1,
			null, new FuncSpec(funcSpec));

               //************ Initial Calculations ******************
	TupleFactory tf = TupleFactory.getInstance();
	po.setAlgebraicFunction(INIT);
	po.attachInput(tup1);
	Tuple t = null;
	Result res = po.getNextTuple();
	Tuple outputInitial1 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result
			: null;
	Tuple outputInitial2 = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result
			: null;
	System.out.println(outputInitial1 + " " + outputInitial2);
	assertEquals(outputInitial1, outputInitial2);
	Double sum = (Double) outputInitial1.get(0);
	Long count = (Long) outputInitial1.get(1);
	assertEquals(initialExpectedSum, sum);
	assertEquals(initialExpectedCount, count);

               //************ Intermediate Data and Calculations ******************
	DataBag bag = BagFactory.getInstance().newDefaultBag();
	bag.add(outputInitial1);
	bag.add(outputInitial2);
	Tuple outputInitial = tf.newTuple();
	outputInitial.append(bag);
	// Tuple outputIntermed = intermed.exec(outputInitial);
	po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null,
			new FuncSpec(funcSpec));
	po.setAlgebraicFunction(INTERMED);
	po.attachInput(outputInitial);
	res = po.getNextTuple();
	Tuple outputIntermed = (res.returnStatus == POStatus.STATUS_OK) ? (Tuple) res.result
			: null;

	sum = (Double) outputIntermed.get(0);
	count = (Long) outputIntermed.get(1);
	assertEquals(intermedExpectedSum, sum);
	assertEquals(intermedExpectedCount, count);
	System.out.println(outputIntermed);

               //************ Final Calculations ******************
	po = new POUserFunc(new OperatorKey("", r.nextLong()), -1, null,
			new FuncSpec(funcSpec));
	po.setAlgebraicFunction(FINAL);
	po.attachInput(outputInitial);
	res = po.getNextTuple();
	Double output = (res.returnStatus == POStatus.STATUS_OK) ? (Double) res.result
			: null;
	// Double output = fin.exec(outputInitial);
	assertEquals((Double)expectedAvg, output);
	// System.out.println("output = " + output);

}