Java Code Examples for org.apache.pig.data.Tuple#get()

The following examples show how to use org.apache.pig.data.Tuple#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Reverse.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public String exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
        return null;
    }
    try {
        String str = (String) input.get(0);
        if (str == null) return null;
        if (str.length() == 0) return str;
        char[] chars = str.toCharArray();
        int lastIndex = chars.length-1;
        for (int i=0; i<=lastIndex/2; i++) {
            char c = chars[i];
            chars[i] = chars[lastIndex-i];
            chars[lastIndex-i] = c;
        }
        return new String(chars);
    } catch (ExecException e) {
        warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
        return null;
    }
}
 
Example 2
Source File: PigStreamingUDF.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public WritableByteArray serializeToBytes(Tuple t) throws IOException {
    out.reset();
    int sz;
    Object field;
    if (t == null) {
        sz = 0;
    } else {
        sz = t.size();
    }
    for (int i=0; i < sz; i++) {
        field = t.get(i);
        StorageUtil.putField(out, field, DELIMS, true);
        if (i != sz-1) {
            out.write(DELIMS.getParamDelim());
        }
    }
    byte[] recordDel = DELIMS.getRecordEnd();
    out.write(recordDel, 0, recordDel.length);
    return out;
}
 
Example 3
Source File: DateExtractor.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public String exec(Tuple input) throws IOException {
  if (input == null || input.size() == 0 || input.get(0) == null)
    return null;
  String str="";
  try{
    str = (String)input.get(0);
    Date date = incomingDateFormat.parse(str);
    return outgoingDateFormat.format(date);
    
  } catch (ParseException pe) {
    System.err.println("piggybank.evaluation.util.apachelogparser.DateExtractor: unable to parse date "+str);
    return null;
  } catch(Exception e){
    throw new IOException("Caught exception processing input row ", e);
  }
}
 
Example 4
Source File: AliasableEvalFunc.java    From datafu with Apache License 2.0 5 votes vote down vote up
public String getString(Tuple tuple, String alias, String defaultValue) throws ExecException {
  Integer i = getPosition(alias); 
  if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
  if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
  String s = (String)tuple.get(i);
  if (s == null) return defaultValue;
  return s;
}
 
Example 5
Source File: FloatVAR.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Double exec(Tuple input) throws IOException {
    try {
        DataBag b = (DataBag)input.get(0);
        Tuple combined = combine(b);

        Double sum = (Double)combined.get(0);
        Double sumSquare = (Double)combined.get(1);
        if(sum == null) {
            return null;
        }
        Long count = (Long)combined.get(2);

        Double var = null;
        
        if (count > 0) {
            Double avg = new Double(sum / count);
            Double avgSquare = new Double(sumSquare / count);
            var = avgSquare - avg*avg;
        }
        return var;
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing variance in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }
}
 
Example 6
Source File: JsFunction.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Object exec(Tuple tuple) throws IOException {
	Schema inputSchema = this.getInputSchema();
    if (LOG.isDebugEnabled()) {
        LOG.debug( "CALL " + stringify(outputSchema) + " " + functionName + " " + stringify(inputSchema));
    }
    // UDF always take a tuple: unwrapping when not necessary to simplify UDFs
    if (inputSchema.size() == 1 && inputSchema.getField(0).type == DataType.TUPLE) {
        inputSchema = inputSchema.getField(0).schema;
    }

    Scriptable params = pigTupleToJS(tuple, inputSchema, 0);

    Object[] passedParams = new Object[inputSchema.size()];
    for (int j = 0; j < passedParams.length; j++) {
        passedParams[j] = params.get(inputSchema.getField(j).alias, params);
    }

    Object result = jsScriptEngine.jsCall(functionName, passedParams);
    if (LOG.isDebugEnabled()) {
        LOG.debug( "call "+functionName+"("+Arrays.toString(passedParams)+") => "+toString(result));
    }

    // We wrap the result with an object in the following cases:
    //   1. Result is not an object type.
    //   2. OutputSchema is a tuple type. 
    if (!(result instanceof NativeObject) || outputSchema.getField(0).type == DataType.TUPLE) {
        Scriptable wrapper = jsScriptEngine.jsNewObject();
        wrapper.put(outputSchema.getField(0).alias, wrapper, result);
        result = wrapper;
    }
    Tuple evalTuple = jsToPigTuple((Scriptable)result, outputSchema, 0);
    Object eval = outputSchema.size() == 1 ? evalTuple.get(0) : evalTuple;
    LOG.debug(eval);
    return eval;
}
 
Example 7
Source File: BitwiseORAggregation.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public void aggregate(Tuple input) throws IOException
{
    Object obj = input.get(inputColumnIndex);
    if (obj == null)
        return;

    nonNullValueSeen = true;

    long value = ((Number) (input.get(inputColumnIndex))).longValue();
    bitmap |= value;
}
 
Example 8
Source File: Over.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Double exec(Tuple input) throws IOException {
    DataBag inbag = (DataBag)input.get(0);
    OverBag.OverBagIterator iter =
        (OverBag.OverBagIterator)inbag.iterator();

    return ((double)++currentRow)/(double)iter.tuples.size();
}
 
Example 9
Source File: Concat.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public Object eval(Tuple tuple) throws ExecException
{
    String str = "";
    for (int i = 0; i < nargs; i++)
    {
        Object field = tuple.get(i);
        if (field != null)
            str = str + field.toString();
    }
    return str;
}
 
Example 10
Source File: POPartitionRearrangeTez.java    From spork with Apache License 2.0 5 votes vote down vote up
protected DataBag constructPROutput(List<Result> resLst, Tuple value) throws ExecException{
    Tuple t = super.constructLROutput(resLst, null, value);

    //Construct key
    Object key = t.get(1);

    // Construct an output bag and feed in the tuples
    DataBag opBag = mBagFactory.newDefaultBag();

    // Put the index, key, and value in a tuple and return
    // first -> min, second -> max
    Pair <Integer, Integer> indexes = reducerMap.get(key);

    // For non skewed keys, we set the partition index to be -1
    if (indexes == null) {
        indexes = new Pair <Integer, Integer>(-1,0);
    }

    for (Integer reducerIdx = indexes.first, cnt = 0; cnt <= indexes.second; reducerIdx++, cnt++) {
        if (reducerIdx >= totalReducers) {
            reducerIdx = 0;
        }
        Tuple opTuple = mTupleFactory.newTuple(4);
        opTuple.set(0, t.get(0));
        // set the partition index
        opTuple.set(1, reducerIdx.intValue());
        opTuple.set(2, key);
        opTuple.set(3, t.get(2));

        opBag.add(opTuple);
    }

    return opBag;
}
 
Example 11
Source File: DaysBetween.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Long exec(Tuple input) throws IOException
{
    if (input == null || input.size() < 2 || input.get(0) == null || input.get(1) == null) {
        return null;
    }

    DateTime startDate = (DateTime) input.get(0);
    DateTime endDate = (DateTime) input.get(1);

    // Larger date first
    // Subtraction may overflow
    return (startDate.getMillis() - endDate.getMillis()) / 86400000L;

}
 
Example 12
Source File: TestEvalPipeline2.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testLimitAfterSortDesc() throws Exception{
    int LOOP_COUNT = 40;
    File tmpFile = Util.createTempFileDelOnExit("test", "txt");
    PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
    Random r = new Random(1);
    int rand;
    for(int i = 0; i < LOOP_COUNT; i++) {
        rand = r.nextInt(100);
        ps.println(rand);
    }
    ps.close();

    pigServer.registerQuery("A = LOAD '"
            + Util.generateURI(tmpFile.toString(), pigServer
                    .getPigContext()) + "' AS (num:int);");
    pigServer.registerQuery("B = order A by num desc parallel 2;");
    pigServer.registerQuery("C = limit B 10;");
    Iterator<Tuple> iter = pigServer.openIterator("C");
    if(!iter.hasNext()) Assert.fail("No output found");
    int numIdentity = 0;
    int oldNum = Integer.MAX_VALUE;
    int newNum;
    while(iter.hasNext()){
        Tuple t = iter.next();
        newNum = (Integer)t.get(0);
        Assert.assertTrue(newNum<=oldNum);
        oldNum = newNum;
        ++numIdentity;
    }
    Assert.assertEquals(10, numIdentity);
}
 
Example 13
Source File: FloatSignum.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * java level API
 * @param input expects a single numeric value
 * @param output returns a single numeric value, 
 * signum function of the argument
 */
@Override
public Float exec(Tuple input) throws IOException {
       if (input == null || input.size() == 0 || input.get(0) == null)
           return null;

       try{
	    Float d = (Float)input.get(0);
	    return Math.signum(d);
       }catch (Exception e){
           throw new IOException("Caught exception processing input row ", e);
       }
	
}
 
Example 14
Source File: LongAvg.java    From spork with Apache License 2.0 5 votes vote down vote up
static protected long count(Tuple input) throws ExecException {
    DataBag values = (DataBag)input.get(0);
    Iterator it = values.iterator();
    long cnt = 0;
    while (it.hasNext()){
        Tuple t = (Tuple)it.next();
        if (t != null && t.size() > 0 && t.get(0) != null)
            cnt++;
    }

    return cnt;
}
 
Example 15
Source File: TestFilterUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Boolean exec(Tuple input) throws IOException {
    try {
        int col = (Integer)input.get(0);
        if (col > 10)
            return true;
    } catch (ExecException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return false;
}
 
Example 16
Source File: BagTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void bagJoinFullOuterTest() throws Exception {
    PigTest test = createPigTestFromString(bagJoinFullOuterTest);

    writeLinesToFile("input",
            "1\t{(K1,A1),(K2,B1),(K3,C1)}\t{(K1,A2),(K2,B2),(K2,B22)}\t{(K1,A3),(K3,C3),(K4,D3)}");

    try {
        test.runScript();
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }

    List<Tuple> tuples = getLinesForAlias(test, "data2");
    assertEquals(tuples.size(), 1);
    Tuple tuple = tuples.get(0);
    DataBag joined1 = (DataBag)tuple.get(1);
    DataBag joined2 = (DataBag)tuple.get(2);
    
    String joined1Schema = "{(bag1::k: chararray,bag1::v: chararray,bag2::k: chararray,bag2::v: chararray,bag3::k3: chararray,bag3::v3: chararray)}";
    String joined2Schema = "{(bag1::k: chararray,bag1::v: chararray,bag3::k3: chararray,bag3::v3: chararray,bag2::k: chararray,bag2::v: chararray)}";
    String expectedJoined1 = "{(K1,A1,K1,A2,K1,A3),(K2,B1,K2,B2,,),(K2,B1,K2,B22,,),(K3,C1,,,K3,C3),(,,,,K4,D3)}";
    String expectedJoined2 = "{(K1,A1,K1,A3,K1,A2),(K2,B1,,,K2,B2),(K2,B1,,,K2,B22),(K3,C1,K3,C3,,),(,,K4,D3,,)}";
    
    // compare sorted bags because there is no guarantee on the order
    assertEquals(getSortedBag(joined1).toString(),getSortedBag(expectedJoined1, joined1Schema).toString());
    assertEquals(getSortedBag(joined2).toString(),getSortedBag(expectedJoined2, joined2Schema).toString());
}
 
Example 17
Source File: CubeDimensions.java    From Cubert with Apache License 2.0 5 votes vote down vote up
public DimensionKey extractDimensionKey(Tuple tuple) throws ExecException
{
    int[] array = key.getArray();
    for (int i = 0; i < inputIndex.length; i++)
    {
        Object dim = tuple.get(inputIndex[i]);
        if (dim == null)
            throw new RuntimeException("Dimension is null for tuple " + tuple);

        switch (dimensionTypes[i])
        {
        case BOOLEAN:
            array[dimensionOffsets[i]] = ((Boolean) dim) ? 1 : 0;
            break;
        case INT:
            array[dimensionOffsets[i]] = ((Number) dim).intValue();
            break;
        case LONG:
            long val = ((Number) dim).longValue();
            array[dimensionOffsets[i]] = (int) (val >> 32); // upper 32 bits
            array[dimensionOffsets[i] + 1] = (int) val; // lower 32 bits
            break;
        case STRING:
            CodeDictionary dict = dictionaries[i];
            int code = dict.getCodeForKey((String) dim);
            if (code == -1)
                code = dict.addKey((String) dim);
            array[dimensionOffsets[i]] = code;
            break;
        default:
            throw new RuntimeException("Type of dimension is not INT, LONG or STRING for tuple "
                    + tuple + " at col " + i);
        }
    }

    return key;
}
 
Example 18
Source File: GetMilliSecond.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Integer exec(Tuple input) throws IOException {
    if (input == null || input.size() < 1 || input.get(0) == null) {
        return null;
    }

    return ((DateTime) input.get(0)).getMillisOfSecond();
}
 
Example 19
Source File: IntVAR.java    From datafu with Apache License 2.0 4 votes vote down vote up
static protected Tuple combine(DataBag values) throws ExecException{
    long sum = 0;
    long sumSquare = 0;
    long totalCount = 0;

    // combine is called from Intermediate and Final
    // In either case, Initial would have been called
    // before and would have sent in valid tuples
    // Hence we don't need to check if incoming bag
    // is empty

    Tuple output = mTupleFactory.newTuple(3);
    boolean sawNonNull = false;
    for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
        Tuple t = it.next();
        Long i = (Long)t.get(0);
        Long iSquare = (Long)t.get(1);
        Long count = (Long)t.get(2);
        
        // we count nulls in var as contributing 0
        // a departure from SQL for performance of
        // COUNT() which implemented by just inspecting
        // size of the bag
        if(i == null) {
            i = (long)0;
            iSquare = (long)0;
        } else {
            sawNonNull = true;
        }
        sum += i;
        sumSquare += iSquare;
        totalCount += count;
    }
    if(sawNonNull) {
        output.set(0, new Long(sum));
        output.set(1, new Long(sumSquare));
    } else {
        output.set(0, null);
        output.set(1, null);
    }
    output.set(2, Long.valueOf(totalCount));
    return output;
}
 
Example 20
Source File: MedianFlattenOperator.java    From Cubert with Apache License 2.0 4 votes vote down vote up
private Tuple tupleFlatten(Tuple inTuple) throws ExecException
{
    int outputSchemaSize = schema.getNumColumns();
    Tuple outTuple = TupleFactory.getInstance().newTuple(outputSchemaSize);

    // last column of inTuple is bag
    for (int i = 0; i < inTuple.size() - 1; i++)
    {
        outTuple.set(i, inTuple.get(i));
    }

    // outputSchemaSize is 1 greater than inputSchemaSize, and tuple zero indexed, so
    // -2
    DataBag bag = (DataBag) inTuple.get(outputSchemaSize - 2);
    Iterator<Tuple> bagIterator = bag.iterator();
    Tuple firstTuple = bagIterator.next();

    if (firstTuple == null)
    {
        throw new RuntimeException("Bag should not be empty");
    }

    outTuple.set(outputSchemaSize - 2, firstTuple.get(0));
    outTuple.set(outputSchemaSize - 1, firstTuple.get(1));

    if (bagIterator.hasNext())
    {
        Tuple secondTuple = bagIterator.next();
        secondOutput = TupleFactory.getInstance().newTuple(outputSchemaSize);

        // last column of inTuple is bag
        for (int i = 0; i < inTuple.size() - 1; i++)
        {
            secondOutput.set(i, inTuple.get(i));
        }

        secondOutput.set(outputSchemaSize - 2, secondTuple.get(0));
        secondOutput.set(outputSchemaSize - 1, secondTuple.get(1));
    }

    return outTuple;

    /*
     * Tuple outTuple = TupleFactory.getInstance().newTuple(4);
     * 
     * outTuple.set(0, inTuple.get(0)); outTuple.set(1, inTuple.get(1));
     * 
     * DataBag bag = (DataBag) inTuple.get(2); Iterator<Tuple> bagIterator =
     * bag.iterator(); Tuple firstTuple = bagIterator.next();
     * 
     * if (firstTuple == null) { throw new RuntimeException
     * ("Bag should not be empty"); }
     * 
     * outTuple.set(2, firstTuple.get(0)); outTuple.set(3, firstTuple.get(1));
     * 
     * // case of two outputs if (bagIterator.hasNext()) { Tuple secondTuple =
     * bagIterator.next(); secondOutput = TupleFactory.getInstance().newTuple(4);
     * secondOutput.set(0, inTuple.get(0)); secondOutput.set(1, inTuple.get(1));
     * secondOutput.set(2, secondTuple.get(0)); secondOutput.set(3,
     * secondTuple.get(1)); }
     * 
     * return outTuple;
     */

}