org.apache.pig.data.TupleFactory Java Examples

The following examples show how to use org.apache.pig.data.TupleFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DictionaryRefreshReduceSideOperator.java    From Cubert with Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 * 
 * @see com.linkedin.cubert.operator.TupleOperator#setInput(java.util.Map,
 *      org.codehaus.jackson.JsonNode, com.linkedin.cubert.block.BlockProperties)
 */
@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException,
        InterruptedException
{
    if (json.has("dictionary"))
    {
        // load the dictionary from file
        String dictionaryName = JsonUtils.getText(json, "dictionary");
        String cachedPath = FileCache.get(dictionaryName);
        dictionaryMap = GenerateDictionary.loadDictionary(cachedPath, false, null);
    }
    else
    {
        dictionaryMap = new HashMap<String, CodeDictionary>();

        String[] columns = JsonUtils.getText(json, "columns").split(",");
        for (String name : columns)
            dictionaryMap.put(name, new CodeDictionary());
    }

    output = TupleFactory.getInstance().newTuple(3);
    String inputBlockName = JsonUtils.asArray(json, "input")[0];
    block = input.get(inputBlockName);
    dictionaryUpdated = false;
}
 
Example #2
Source File: TestAggregators.java    From Cubert with Apache License 2.0 6 votes vote down vote up
private Tuple runAgg(Object[][] input, AggregationFunction agg, DataType outputType)
    throws IOException, InterruptedException
{
    Block dataBlock =
        new ArrayBlock(Arrays.asList(input), new String[] { "value" });

    ObjectMapper mapper = new ObjectMapper();
    ObjectNode node = mapper.createObjectNode();
    node.put("input", "value");
    node.put("output", "agg");

    agg.setup(dataBlock, new BlockSchema(outputType.toString() + " agg"), node);

    agg.resetState();

    Tuple inputTuple;
    while ((inputTuple = dataBlock.next()) != null)
    {
        agg.aggregate(inputTuple);
    }

    Tuple outputTuple = TupleFactory.getInstance().newTuple(1);
    agg.output(outputTuple);

    return outputTuple;
}
 
Example #3
Source File: TestPODistinct.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPODistictWithIntAndNullValues() throws ExecException {

    input = BagFactory.getInstance().newDefaultBag();
    TupleFactory tf = TupleFactory.getInstance();
    for (int i = 0; i < MAX_SAMPLES; i++) {
        Tuple t = tf.newTuple();
        t.append(r.nextInt(MAX_VALUE));
        input.add(t);
        t = tf.newTuple();
        t.append(null);
        input.add(t);
        // System.out.println(t);
    }

    confirmDistinct();
 }
 
Example #4
Source File: WeightedReservoirSamplingTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void weightedReservoirSampleAccumulateTest() throws IOException
{
   WeightedReservoirSample sampler = new WeightedReservoirSample("10", "1");

   for (int i=0; i<100; i++)
   {
     Tuple t = TupleFactory.getInstance().newTuple(2);
     t.set(0, i);
     t.set(1, i + 1);
     DataBag bag = BagFactory.getInstance().newDefaultBag();
     bag.add(t);
     Tuple input = TupleFactory.getInstance().newTuple(bag);
     sampler.accumulate(input);
   }

   DataBag result = sampler.getValue();
   verifyNoRepeatAllFound(result, 10, 0, 100);
}
 
Example #5
Source File: BuildBloom.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
 
Example #6
Source File: TestProject.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetNextMultipleProjectionsWithNull() throws ExecException, IOException {
    t = tRandomAndNull;
    ArrayList<Integer> cols = new ArrayList<Integer>();
    proj.attachInput(t);
    for (int j = 0; j < t.size() - 1; j++) {
        proj.attachInput(t);
        cols.add(j);
        cols.add(j + 1);
        proj.setColumns(cols);

        res = proj.getNext();
        TupleFactory tupleFactory = TupleFactory.getInstance();
        ArrayList<Object> objList = new ArrayList<Object>();
        objList.add(t.get(j));
        objList.add(t.get(j + 1));
        Tuple expectedResult = tupleFactory.newTuple(objList);
        assertEquals(POStatus.STATUS_OK, res.returnStatus);
        assertEquals(expectedResult, res.result);
        cols.clear();
    }
}
 
Example #7
Source File: CountEach.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag getValue()
{
  DataBag output = BagFactory.getInstance().newDefaultBag();
  for (Tuple tuple : counts.keySet()) {
    Tuple outputTuple = null;
    Tuple innerTuple = TupleFactory.getInstance().newTuple(tuple.getAll());
    if (flatten) {        
      innerTuple.append(counts.get(tuple));
      outputTuple = innerTuple;
    } else {
      outputTuple = TupleFactory.getInstance().newTuple();
      outputTuple.append(innerTuple);
      outputTuple.append(counts.get(tuple));
    }
    output.add(outputTuple);
  }

  return output;
}
 
Example #8
Source File: TestNotEqualTo.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleNe() throws ExecException {
    Tuple tuple_1 = TupleFactory.getInstance().newTuple("item_1");
    Tuple tuple_2 = TupleFactory.getInstance().newTuple("item_2");
    ConstantExpression lt = GenPhyOp.exprConst();
    lt.setValue(tuple_1);
    ConstantExpression rt = GenPhyOp.exprConst();
    rt.setValue(tuple_2);
    NotEqualToExpr g = GenPhyOp.compNotEqualToExpr();
    g.setLhs(lt);
    g.setRhs(rt);
    g.setOperandType(DataType.TUPLE);
    Result r = g.getNextBoolean();
    assertEquals(POStatus.STATUS_OK, r.returnStatus);
    assertTrue((Boolean)r.result);
}
 
Example #9
Source File: StorageUtil.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Transform bytes from a byte array up to the specified length to a <code>Tuple</code>
 *
 * @param buf the byte array
 * @param length number of bytes to consume from the byte array
 * @param fieldDel the field delimiter
 * @return tuple constructed from the bytes
 */
public static Tuple bytesToTuple(byte[] buf, int offset, int length, byte fieldDel) {

    int start = offset;

    ArrayList<Object> protoTuple = new ArrayList<Object>();

    for (int i = offset; i < length; i++) {
        if (buf[i] == fieldDel) {
            readField(protoTuple, buf, start, i);
            start = i + 1;
        }
    }

    // pick up the last field
    if (start <= length) {
        readField(protoTuple, buf, start, length);
    }

    return TupleFactory.getInstance().newTupleNoCopy(protoTuple);
}
 
Example #10
Source File: COR.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    Tuple output = TupleFactory.getInstance().newTuple(input.size()*(input.size()-1)); 
    try {
        int k = -1;
        for(int i=0;i<input.size();i++){
            for(int j=i+1;j<input.size();j++){
                DataBag first = (DataBag)input.get(i);
                DataBag second = (DataBag)input.get(j);
                output.set(++k, computeAll(first, second));
                output.set(++k, (Long)first.size());
            }
        }
    } catch(Exception t) {
        System.err.println("Failed to process input record; error - " + t.getMessage());
        return null;
    }
    return output;    
}
 
Example #11
Source File: PigAvroDatumReader.java    From Cubert with Apache License 2.0 6 votes vote down vote up
/**
 * Called to read a record instance. Overridden to read a pig tuple.
 */
@Override
protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException {

    // find out the order in which we will receive fields from the ResolvingDecoder
    Field[] readOrderedFields = in.readFieldOrder();

    /* create an empty tuple */
    Tuple tuple = TupleFactory.getInstance().newTuple(readOrderedFields.length);

    /* read fields and put in output order in tuple
     * The ResolvingDecoder figures out the writer schema to reader schema mapping for us
     */
    for (Field f : readOrderedFields) {
        tuple.set(f.pos(), read(old, f.schema(), in));
    }

    return tuple;
}
 
Example #12
Source File: BagTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void tupleFromBagAccumulateTest() throws Exception
{
  TupleFactory tf = TupleFactory.getInstance();
  BagFactory bf = BagFactory.getInstance();
 
  TupleFromBag op = new TupleFromBag();
  
  Tuple defaultValue = tf.newTuple(1000);
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(4))), 0, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(9))), 0, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(16))), 0, defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(4));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(11))), 1, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(17))), 1, defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(5))), 1, defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(17));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(), 2, defaultValue)));
  assertEquals(op.getValue(), defaultValue);
  op.cleanup();
}
 
Example #13
Source File: BagTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void firstTupleFromBagAccumulateTest() throws Exception
{
  TupleFactory tf = TupleFactory.getInstance();
  BagFactory bf = BagFactory.getInstance();
 
  FirstTupleFromBag op = new FirstTupleFromBag();
  
  Tuple defaultValue = tf.newTuple(1000);
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(4))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(9))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(16))), defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(4));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(11))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(17))), defaultValue)));
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(Arrays.asList(tf.newTuple(5))), defaultValue)));
  assertEquals(op.getValue(), tf.newTuple(11));
  op.cleanup();
  
  op.accumulate(tf.newTuple(Arrays.asList(bf.newDefaultBag(), defaultValue)));
  assertEquals(op.getValue(), defaultValue);
  op.cleanup();
}
 
Example #14
Source File: Util.java    From spork with Apache License 2.0 6 votes vote down vote up
static public Tuple buildBinTuple(final Object... args) throws IOException {
    return TupleFactory.getInstance().newTuple(Lists.transform(
            Lists.newArrayList(args), new Function<Object, DataByteArray>() {
                @Override
                public DataByteArray apply(Object o) {
                    if (o == null) {
                        return null;
                    }
                    try {
                        return new DataByteArray(DataType.toBytes(o));
                    } catch (ExecException e) {
                        return null;
                    }
                }
            }));
}
 
Example #15
Source File: TestEqualTo.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleNe() throws ExecException {
    Tuple tuple_1 = TupleFactory.getInstance().newTuple("item_1");
    Tuple tuple_2 = TupleFactory.getInstance().newTuple("item_2");
    ConstantExpression lt = GenPhyOp.exprConst();
    lt.setValue(tuple_1);
    ConstantExpression rt = GenPhyOp.exprConst();
    rt.setValue(tuple_2);
    EqualToExpr g = GenPhyOp.compEqualToExpr();
    g.setLhs(lt);
    g.setRhs(rt);
    g.setOperandType(DataType.TUPLE);
    Result r = g.getNextBoolean();
    assertEquals(POStatus.STATUS_OK, r.returnStatus);
    assertFalse((Boolean)r.result);
}
 
Example #16
Source File: NGramGenerator.java    From spork with Apache License 2.0 6 votes vote down vote up
public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    try{
        DataBag output = DefaultBagFactory.getInstance().newDefaultBag();
        String query = (String)input.get(0);
        String[] words = TutorialUtil.splitToWords(query);
        Set<String> ngrams = new HashSet<String>();
        TutorialUtil.makeNGram(words, ngrams, _ngramSizeLimit);
        for (String ngram : ngrams) {
            Tuple t = TupleFactory.getInstance().newTuple(1);
            t.set(0, ngram);
            output.add(t);
        }
        return output;
    }catch(Exception e){
        System.err.println("NGramGenerator: failed to process input; error - " + e.getMessage());
        return null;
    }
}
 
Example #17
Source File: POCounterStatsTez.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Result getNextTuple() throws ExecException {
    try {
        Map<Integer, Long> counterRecords = new HashMap<Integer, Long>();
        Integer key = null;
        Long value = null;
        // Read count of records per task
        while (reader.next()) {
            key = ((IntWritable)reader.getCurrentKey()).get();
            for (Object val : reader.getCurrentValues()) {
                value = ((LongWritable)val).get();
                counterRecords.put(key, value);
            }
        }

        // BinInterSedes only takes String for map key
        Map<String, Long> counterOffsets = new HashMap<String, Long>();
        // Create a map to contain task ids and beginning offset of record count
        // based on total record count of all tasks
        // For eg: If Task 0 has 5 records, Task 1 has 10 records and Task 2 has 3 records
        // map will contain {0=0, 1=5, 2=15}
        Long prevTasksCount = counterRecords.get(0);
        counterOffsets.put("0", 0L);
        for (int i = 1; i < counterRecords.size(); i++) {
            counterOffsets.put("" + i, prevTasksCount);
            prevTasksCount += counterRecords.get(i);
        }

        Tuple tuple = TupleFactory.getInstance().newTuple(1);
        tuple.set(0, counterOffsets);
        writer.write(POValueOutputTez.EMPTY_KEY, tuple);
        return RESULT_EOP;
    } catch (IOException e) {
        throw new ExecException(e);
    }
}
 
Example #18
Source File: TestDiffDateTime.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSecondsDiff() throws Exception {

    Tuple t1 = TupleFactory.getInstance().newTuple(2);
    t1.set(0, "2009-01-07T00:00:00.000Z");
    t1.set(1, "2002-01-01T00:00:00.000Z");

    ISOSecondsBetween func = new ISOSecondsBetween();
    Long secs = func.exec(t1);

    System.out.println("Seconds: " + secs.toString());

    Assert.assertTrue(secs == 221443200L);
}
 
Example #19
Source File: TestPOBinCond.java    From spork with Apache License 2.0 5 votes vote down vote up
private DataBag getBag(byte type) {
    DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
    for(int i = 0; i < 10; i ++) {
        Tuple t = TupleFactory.getInstance().newTuple();
        switch(type) {
            case DataType.BOOLEAN:
                t.append(r.nextBoolean());
                break;
            case DataType.INTEGER:
                t.append(r.nextInt(2));
                break;
            case DataType.LONG:
                t.append(r.nextLong() % 2L);
                break;
            case DataType.FLOAT:
                t.append((i % 2 == 0 ? 1.0f : 0.0f));
                break;
            case DataType.DOUBLE:
                t.append((i % 2 == 0 ? 1.0 : 0.0));
                break;
            case DataType.DATETIME:
                t.append(new DateTime(r.nextLong() % 2L));
                break;
        }
        t.append(1);
        t.append(0);
        bag.add(t);
    }
    return bag;
}
 
Example #20
Source File: TestTruncateDateTime.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testToHour() throws Exception {

    Tuple t1 = TupleFactory.getInstance().newTuple(1);
    t1.set(0, "2010-04-15T08:11:33.020Z");

    ISOToHour func = new ISOToHour();
    String truncated = func.exec(t1);

    assertEquals("2010-04-15T08:00:00.000Z", truncated);
}
 
Example #21
Source File: HashJoinOperator.java    From Cubert with Apache License 2.0 5 votes vote down vote up
Tuple getProjectedKeyTuple(Tuple inputTuple, int[] indices, boolean makeNewObject) throws ExecException
{
    Tuple tempTuple;

    if (makeNewObject)
        tempTuple = TupleFactory.getInstance().newTuple(leftBlockColumns.length);
    else
        tempTuple = keyTuple;

    for (int i = 0; i < indices.length; i++)
        tempTuple.set(i, inputTuple.get(indices[i]));

    return tempTuple;
}
 
Example #22
Source File: TestTruncateDateTime.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * When no time zone is specified at all, we use the default.
 * @throws Exception
 */
@Test
public void testParseDateTime_NoTimeZone() throws ExecException {

    Tuple t1 = TupleFactory.getInstance().newTuple(1);
    t1.set(0, "2010-04-15T08:11:33.020");

    // Time zone is preserved.
    assertEquals(new DateTime(2010, 4, 15, 8, 11, 33, 20, DateTimeZone.getDefault()), ISOHelper.parseDateTime(t1));
}
 
Example #23
Source File: ToTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    try {
        List<Object> items = new ArrayList<Object>();
        for (int i = 0; i < input.size(); ++i) {
            items.add(input.get(i));
        }
        return TupleFactory.getInstance().newTuple(items);
    } catch (Exception e) {
        throw new RuntimeException("Error while creating a tuple", e);
    }
}
 
Example #24
Source File: TestStitch.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testFirstShort() throws Exception {
    Stitch func = new Stitch();
    DataBag b1 = BagFactory.getInstance().newDefaultBag();
    Tuple t = TupleFactory.getInstance().newTuple();
    t.append("a");
    t.append("b");
    b1.add(t);
    
    DataBag b2 = BagFactory.getInstance().newDefaultBag();
    t = TupleFactory.getInstance().newTuple();
    t.append("1");
    t.append("2");
    b2.add(t);
    t = TupleFactory.getInstance().newTuple();
    t.append("3");
    t.append("4");
    b2.add(t);

    t = TupleFactory.getInstance().newTuple();
    t.append(b1);
    t.append(b2);
    DataBag out = func.exec(t);
    assertEquals(1, out.size());
    Iterator<Tuple> iter = out.iterator();
    t = iter.next();
    assertEquals(4, t.size());
    assertEquals("a", t.get(0));
    assertEquals("b", t.get(1));
    assertEquals("1", t.get(2));
    assertEquals("2", t.get(3));
}
 
Example #25
Source File: TestBuiltin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testROUND() throws Exception {
    Double         dbl     = 0.987654321d;
    Float          flt     = 0.987654321f;
    EvalFunc<Long> rounder = new ROUND();
    Tuple          tup     = TupleFactory.getInstance().newTuple(1);
    long           expected, lng_out;

    tup.set(0, dbl);
    expected = Math.round(dbl);
    lng_out   = rounder.exec(tup);
    assertEquals(expected, lng_out);

    tup.set(0, flt);
    expected = Math.round(flt);
    lng_out   = rounder.exec(tup);
    assertEquals(expected, lng_out);

    tup.set(0,  4.6d); assertEquals( 5l, lng_out = rounder.exec(tup));
    tup.set(0,  2.4d); assertEquals( 2l, lng_out = rounder.exec(tup));
    tup.set(0,  1.0d); assertEquals( 1l, lng_out = rounder.exec(tup));
    tup.set(0, -1.0d); assertEquals(-1l, lng_out = rounder.exec(tup));
    tup.set(0, -2.4d); assertEquals(-2l, lng_out = rounder.exec(tup));
    tup.set(0, -4.6d); assertEquals(-5l, lng_out = rounder.exec(tup));

    // Rounds towards positive infinity: round(x) = floor(x + 0.5)
    tup.set(0,  3.5d); assertEquals( 4l, lng_out = rounder.exec(tup));
    tup.set(0, -3.5d); assertEquals(-3l, lng_out = rounder.exec(tup));
    tup.set(0,  2.5d); assertEquals( 3l, lng_out = rounder.exec(tup));
    tup.set(0, -2.5d); assertEquals(-2l, lng_out = rounder.exec(tup));

    // we don't need to test null input because of SKIP_UDF_CALL_FOR_NULL behavior
}
 
Example #26
Source File: TestHashFNV.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testHashFNV() throws Exception {
    Tuple t1 = TupleFactory.getInstance().newTuple(2);
    t1.set(0, "0000000000065&f=a&br=65");
    t1.set(1, 10000);
    
    Tuple t2 = TupleFactory.getInstance().newTuple(2);
    t2.set(0, "024ulhl0dq1tl&b=2");
    t2.set(1, 100);
    
    Tuple t3 = TupleFactory.getInstance().newTuple(2);
    t3.set(0, null);
    t3.set(1, 100);
    
    Tuple t4 = TupleFactory.getInstance().newTuple(1);
    t4.set(0, "024ulhl0dq1tl&b=2");
    
    HashFNV2 func2 = new HashFNV2();
    Long r = func2.exec(t1);
    assertTrue(r==6228);
    r = func2.exec(t2);
    assertTrue(r==31);
    r = func2.exec(t3);
    assertTrue(r==null);
    
    HashFNV1 func1 = new HashFNV1();
    r = func1.exec(t4);
    assertTrue(r==1669505231);
}
 
Example #27
Source File: TestEvalPipeline.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, Object> exec(Tuple input) throws IOException {

    TupleFactory tupleFactory = TupleFactory.getInstance();
    ArrayList<Object> objList = new ArrayList<Object>();
    objList.add(new Integer(1));
    objList.add(new Double(1.0));
    objList.add(new Float(1.0));
    objList.add(new String("World!"));
    Tuple tuple = tupleFactory.newTuple(objList);

    BagFactory bagFactory = BagFactory.getInstance();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tuple);

    Map<String, Object> mapInMap = new HashMap<String, Object>();
    mapInMap.put("int", new Integer(10));
    mapInMap.put("float", new Float(10.0));

    Map<String, Object> myMap = new HashMap<String, Object>();
    myMap.put("string", new String("Hello"));
    myMap.put("int", new Integer(1));
    myMap.put("long", new Long(1));
    myMap.put("float", new Float(1.0));
    myMap.put("double", new Double(1.0));
    myMap.put("dba", new DataByteArray(new String("bytes").getBytes()));
    myMap.put("map", mapInMap);
    myMap.put("tuple", tuple);
    myMap.put("bag", bag);
    return myMap;
}
 
Example #28
Source File: TestStreamingLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinTwoStreamingRelations()
throws Exception {
    ArrayList<String> list = new ArrayList<String>();
    for (int i=0; i<10000; i++) {
        list.add("A," + i);
    }
    File input = Util.createInputFile("tmp", "", list.toArray(new String[0]));

    // Expected results
    Tuple expected = TupleFactory.getInstance().newTuple(4);
    expected.set(0, "A");
    expected.set(1, 0);
    expected.set(2, "A");
    expected.set(3, 0);

    pigServer.registerQuery("A = load '" +
            Util.generateURI(input.toString(), pigServer.getPigContext()) +
            "' using " + PigStorage.class.getName() + "(',') as (a0, a1);");
    pigServer.registerQuery("B = stream A through `head -1` as (a0, a1);");
    pigServer.registerQuery("C = load '" +
            Util.generateURI(input.toString(), pigServer.getPigContext()) +
            "' using " + PigStorage.class.getName() + "(',') as (a0, a1);");
    pigServer.registerQuery("D = stream C through `head -1` as (a0, a1);");
    pigServer.registerQuery("E = join B by a0, D by a0;");

    Iterator<Tuple> iter = pigServer.openIterator("E");
    int count = 0;
    while (iter.hasNext()) {
        Assert.assertEquals(expected.toString(), iter.next().toString());
        count++;
    }
    Assert.assertTrue(count == 1);
}
 
Example #29
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static DataBag genFloatDataBag(Random r, int column, int row) {
    DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
    for (int i=0;i<row;i++) {
        Tuple t = TupleFactory.getInstance().newTuple();
        for (int j=0;j<column;j++) {
            t.append(r.nextFloat()*1000);
        }
        db.add(t);
    }
    return db;
}
 
Example #30
Source File: TestTruncateDateTime.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Parsing ISO date with a time zone but no time will throw an exception.
 * @throws Exception
 */
@Test
public void testParseDateTime_noTime_UTC() throws ExecException {

    Tuple t1 = TupleFactory.getInstance().newTuple(1);
    t1.set(0, "2010-04-15Z");

    try {
        ISOHelper.parseDateTime(t1);   
        fail("ISO date with a time zone but no time should not be parsable.");
    } catch (IllegalArgumentException e) {
        // This is expected.
    }
}