org.apache.pig.backend.executionengine.ExecException Java Examples

The following examples show how to use org.apache.pig.backend.executionengine.ExecException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StringMax.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void accumulate(Tuple b) throws IOException {
    try {
        String curMax = max(b);
        if (curMax == null) {
            return;
        }
        // check if it lexicographically follows curMax
        if (intermediateMax == null || intermediateMax.compareTo(curMax) > 0) {
            intermediateMax = curMax;
        }            

    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing max in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);           
    }
}
 
Example #2
Source File: BigIntegerAvg.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public BigDecimal exec(Tuple input) throws IOException {
    try {
        BigInteger sum = sum(input);
        if (sum == null) {
            // either we were handed an empty bag or a bag
            // filled with nulls - return null in this case
            return null;
        }
        BigInteger count = count(input);

        BigDecimal avg = null;
        if (count.compareTo(BigInteger.ZERO) > 0)
            avg = div(sum, count);
        return avg;
    } catch (ExecException ee) {
        throw ee;
    }
}
 
Example #3
Source File: FloatAvg.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Double exec(Tuple input) throws IOException {
    try {
        DataBag b = (DataBag)input.get(0);
        Tuple combined = combine(b);

        Double sum = (Double)combined.get(0);
        if(sum == null) {
            return null;
        }
        double count = (Long)combined.get(1);

        Double avg = null;
        if (count > 0) {
            avg = new Double(sum / count);
        }
        return avg;
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing average in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);           
    }
}
 
Example #4
Source File: WeightedReservoirSample.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public double generateScore(Tuple sample) throws ExecException
{
    if(this.weightIdx >= sample.size())
    {
        throw new ExecException(String.format("Weight index %d is outside tuple bounds", this.weightIdx));
    }
    if (sample.get(this.weightIdx) == null)
    { 
        throw new ExecException(String.format("null value for weight at index %d",this.weightIdx));
    }
    double weight = ((Number)sample.get(this.weightIdx)).doubleValue();
    if(Double.compare(weight, 0.0) <= 0)
    {
        //non-positive weight should be avoided
        throw new ExecException(String.format("Invalid sample weight [%f]. It should be a positive real number", weight));
    }
    //a differnt approach to try: u^(1/w) could be exp(log(u)/w) ?
    return Math.pow(Math.random(), 1/weight);
}
 
Example #5
Source File: BinInterSedes.java    From spork with Apache License 2.0 6 votes vote down vote up
public Tuple readTuple(DataInput in, byte type) throws IOException {
switch (type) {
case TUPLE_0:
case TUPLE_1:
case TUPLE_2:
case TUPLE_3:
case TUPLE_4:
case TUPLE_5:
case TUPLE_6:
case TUPLE_7:
case TUPLE_8:
case TUPLE_9:
case TUPLE:
case TINYTUPLE:
case SMALLTUPLE:
    return SedesHelper.readGenericTuple(in, type);
case SCHEMA_TUPLE_BYTE_INDEX:
case SCHEMA_TUPLE_SHORT_INDEX:
case SCHEMA_TUPLE:
    return readSchemaTuple(in, type);
default:
    throw new ExecException("Unknown Tuple type found in stream: " + type);
}
}
 
Example #6
Source File: AugmentBaseDataVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
private boolean inInput(Tuple newTuple, DataBag input, LogicalSchema schema) throws ExecException {
    boolean result;
    for (Iterator<Tuple> iter = input.iterator(); iter.hasNext();) {
        result = true;
        Tuple tmp = iter.next();
        for (int i = 0; i < schema.size(); ++i)
            if (!newTuple.get(i).equals(tmp.get(i)))
            {
                result = false;
                break;
            }
        if (result)
            return true;
    }
    return false;
}
 
Example #7
Source File: TestNotEqualTo.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleNe() throws ExecException {
    Tuple tuple_1 = TupleFactory.getInstance().newTuple("item_1");
    Tuple tuple_2 = TupleFactory.getInstance().newTuple("item_2");
    ConstantExpression lt = GenPhyOp.exprConst();
    lt.setValue(tuple_1);
    ConstantExpression rt = GenPhyOp.exprConst();
    rt.setValue(tuple_2);
    NotEqualToExpr g = GenPhyOp.compNotEqualToExpr();
    g.setLhs(lt);
    g.setRhs(rt);
    g.setOperandType(DataType.TUPLE);
    Result r = g.getNextBoolean();
    assertEquals(POStatus.STATUS_OK, r.returnStatus);
    assertTrue((Boolean)r.result);
}
 
Example #8
Source File: TestTruncateDateTime.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testParseDateTime_defaultTimeZonePreserved() throws ExecException {

    // Remember pre-test default time zone.
    DateTimeZone previousDefaultTimeZone = DateTimeZone.getDefault();

    // Overwrite default time zone for this test.
    DateTimeZone testDefaultDateTimeZone = DateTimeZone.forOffsetHours(-8);
    DateTimeZone.setDefault(testDefaultDateTimeZone);

    Tuple t1 = TupleFactory.getInstance().newTuple(1);
    t1.set(0, "2010-04-15T08:11:33.020");

    assertEquals(new DateTime(2010, 4, 15, 8, 11, 33, 20, testDefaultDateTimeZone), ISOHelper.parseDateTime(t1));

    assertTrue(testDefaultDateTimeZone.equals(DateTimeZone.getDefault()));

    // Restore pre-test default time zone.
    DateTimeZone.setDefault(previousDefaultTimeZone);
}
 
Example #9
Source File: TestBestFitCast.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testByteArrayCast10() throws IOException, ExecException {
    // Passing (float, long, bytearray)
    // Possible matches: (float, long, double) , (float, double, long)
    // Chooses (float, long, double) since that is the only exact match without bytearray
    pigServer.registerQuery("A = LOAD '" + inputFile2 + "' as (x:float, y:long, z);");
    pigServer.registerQuery("B = FOREACH A generate x, " + UDF3.class.getName() + "(x,y,z);");
    Iterator<Tuple> iter = pigServer.openIterator("B");
    assertTrue("No Output received", iter.hasNext());
    int cnt = 0;
    while (iter.hasNext()) {
        Tuple t = iter.next();
        assertTrue(((Tuple)t.get(1)).get(0) instanceof Float);
        assertEquals((Float)((Tuple)t.get(1)).get(0), (Float)0.0f);
        assertTrue(((Tuple)t.get(1)).get(1) instanceof Long);
        assertEquals((Long)((Tuple)t.get(1)).get(1), new Long(cnt + 1));
        assertTrue(((Tuple)t.get(1)).get(2) instanceof Double);
        assertEquals((Double)((Tuple)t.get(1)).get(2), new Double(cnt + 1));
        ++cnt;
    }
    assertEquals(LOOP_SIZE, cnt);
}
 
Example #10
Source File: TestLocalRearrange.java    From spork with Apache License 2.0 6 votes vote down vote up
private void setUp2() throws PlanException, ExecException{
    lr = GenPhyOp.topLocalRearrangeOPWithPlanPlain(0,0,db.iterator().next());
    List<PhysicalPlan> plans = lr.getPlans();
    POLocalRearrange lrT = GenPhyOp.topLocalRearrangeOPWithPlanPlain(0, 1, db.iterator().next());
    List<PhysicalPlan> plansT = lrT.getPlans();
    plans.add(plansT.get(0));
    lr.setPlans(plans);

    POProject proj = GenPhyOp.exprProject();
    proj.setColumn(0);
    proj.setResultType(DataType.TUPLE);
    proj.setOverloaded(true);
    Tuple t = new DefaultTuple();
    t.append(db);
    proj.attachInput(t);
    List<PhysicalOperator> inputs = new ArrayList<PhysicalOperator>();
    inputs.add(proj);
    lr.setInputs(inputs);
}
 
Example #11
Source File: PODemux.java    From spork with Apache License 2.0 6 votes vote down vote up
private PhysicalOperator attachInputWithIndex(Tuple res) throws ExecException {
    
    // unwrap the first field of the tuple to get the wrapped value which
    // is expected by the inner plans, as well as the index of the associated
    // inner plan.
    PigNullableWritable fld = (PigNullableWritable)res.get(0);        
    // choose an inner plan to run based on the index set by
    // the POLocalRearrange operator and passed to this operator
    // by POMultiQueryPackage
    int index = fld.getIndex();
    index &= idxPart;                      

    curPlan = myPlans.get(index);
    if (!(curPlan.getRoots().get(0) instanceof PODemux)) {                             
        res.set(0, fld.getValueAsPigType());
    }
    
    curPlan.attachInput(res);
    return curPlan.getLeaves().get(0);
}
 
Example #12
Source File: TestBestFitCast.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testByteArrayCast11() throws IOException, ExecException {
    // Passing (float, bytearray, long)
    // Possible matches: (float, long, double) , (float, double, long)
    // Chooses (float, double, long) since that is the only exact match without bytearray
    pigServer.registerQuery("A = LOAD '" + inputFile2 + "' as (x:float, y, z:long);");
    pigServer.registerQuery("B = FOREACH A generate x, " + UDF3.class.getName() + "(x,y,z);");
    Iterator<Tuple> iter = pigServer.openIterator("B");
    assertTrue("No Output received", iter.hasNext());
    int cnt = 0;
    while (iter.hasNext()) {
        Tuple t = iter.next();
        assertTrue(((Tuple)t.get(1)).get(0) instanceof Float);
        assertEquals((Float)((Tuple)t.get(1)).get(0), (Float)0.0f);
        assertTrue(((Tuple)t.get(1)).get(1) instanceof Double);
        assertEquals((Double)((Tuple)t.get(1)).get(1), new Double(cnt + 1));
        assertTrue(((Tuple)t.get(1)).get(2) instanceof Long);
        assertEquals((Long)((Tuple)t.get(1)).get(2), new Long(cnt + 1));
        ++cnt;
    }
    assertEquals(LOOP_SIZE, cnt);
}
 
Example #13
Source File: StringMin.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    try {
        // input is a bag with one tuple containing
        // the column we are trying to min on
        DataBag bg = (DataBag) input.get(0);
        String s = null;
        if(bg.iterator().hasNext()) {
            Tuple tp = bg.iterator().next();
            s = (String)(tp.get(0));
        }
        return tfact.newTuple(s);
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;
        String msg = "Error while computing min in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);           
    }
}
 
Example #14
Source File: AppendableSchemaTuple.java    From spork with Apache License 2.0 6 votes vote down vote up
protected int compareTo(SchemaTuple<?> t, boolean checkType) {
    if (checkType && getClass() == t.getClass()) {
        return compareToSpecific((T)t);
    }
    int i = super.compareTo(t, false);
    if (i != 0) {
        return i;
    }
    if (appendedFieldsSize() > 0) {
        int m = schemaSize();
        for (int k = 0; k < size() - schemaSize(); k++) {
            try {
                i = DataType.compare(getAppendedField(k), t.get(m++));
            } catch (ExecException e) {
                throw new RuntimeException("Unable to get append value", e);
            }
            if (i != 0) {
                return i;
            }
        }
    }
    return 0;
}
 
Example #15
Source File: TestPOBinCond.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPOBinCondWithInteger() throws  ExecException, PlanException {

    bag= getBag(DataType.INTEGER);
    TestPoBinCondHelper testHelper= new TestPoBinCondHelper(DataType.INTEGER, new Integer(1) );

    for (Tuple t : bag) {
        testHelper.getPlan().attachInput(t);
        Integer value = (Integer) t.get(0);
        int expected = (value.intValue() == 1)? 1:0 ;
        Integer result=(Integer)testHelper.getOperator().getNextInteger().result;
        int actual = result.intValue();
        assertEquals( expected, actual );
    }

}
 
Example #16
Source File: JsFunction.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * converts a bag to javascript object based on a schema
 * @param bag the bag to convert
 * @param schema the schema to use for conversion
 * @param depth call depth used for debugging messages
 * @return the resulting javascript object
 * @throws FrontendException
 * @throws ExecException
 */
private Scriptable pigBagToJS(DataBag bag, Schema schema, int depth) throws FrontendException, ExecException {
    debugConvertPigToJS(depth, "Bag", bag, schema);
    if (schema.size() == 1 && schema.getField(0).type == DataType.TUPLE) {
        // unwrapping as bags always contain a tuple
        schema = schema.getField(0).schema;
    }
    Scriptable array = jsScriptEngine.jsNewArray(bag.size());
    array.setParentScope(jsScriptEngine.getScope());
    int i= 0;
    for (Tuple t : bag) {
        array.put(i++, array, pigTupleToJS(t, schema, depth + 1));
    }
    debugReturn(depth, array);
    return array;
}
 
Example #17
Source File: SerializedTupleStore.java    From Cubert with Apache License 2.0 5 votes vote down vote up
private void putTupleAndOffset(Tuple mytuple, Integer offset)
{
    Integer startOffsetOfTuple = offset;

    try
    {
        List<Integer> innerList = getInnerOffsetList(mytuple);
        if (innerList == null)
        {
            oneKey.set(mytuple);
            int key = oneKey.hashCode();

            if (tupleMap.get(key) == null)
                tupleMap.put(key, new HashMap<Integer, ArrayList<Integer>>());

            HashMap<Integer, ArrayList<Integer>> subHashTable = tupleMap.get(key);
            ArrayList<Integer> offsetList = new ArrayList<Integer>();
            offsetList.add(startOffsetOfTuple);
            subHashTable.put(startOffsetOfTuple, offsetList);
        }
        else
        {
            innerList.add(offset);
        }

    }
    catch (ExecException e)
    {
        e.printStackTrace();
    }
}
 
Example #18
Source File: IntAvg.java    From spork with Apache License 2.0 5 votes vote down vote up
static protected long count(Tuple input) throws ExecException {
    DataBag values = (DataBag)input.get(0);
    Iterator it = values.iterator();
    long cnt = 0;
    while (it.hasNext()){
        Tuple t = (Tuple)it.next();
        if (t != null && t.size() > 0 && t.get(0) != null)
            cnt++;
    }

    return cnt;
}
 
Example #19
Source File: TestExampleGenerator.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilterWithIsNull() throws ExecException, IOException {
    PigServer pigServer = new PigServer(pigContext);

    pigServer.registerQuery("A = load " + A
            + " using PigStorage() as (x : int, y : int);");
    pigServer.registerQuery("B = filter A by x is not null;");

    Map<Operator, DataBag> derivedData = pigServer.getExamples("B");

    assertNotNull(derivedData);
}
 
Example #20
Source File: TestConstExpr.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetNextFloat() throws ExecException {
    Float inp = r.nextFloat();
    ce.setValue(inp);
    Result resf = ce.getNextFloat();
    Float ret = (Float)resf.result;
    assertEquals(inp, ret);

    // test with null input
    ce.setValue(null);
    resf = ce.getNextFloat();
    ret = (Float)resf.result;
    assertNull(ret);
}
 
Example #21
Source File: DataGenerator.java    From Cubert with Apache License 2.0 5 votes vote down vote up
public List<Tuple> generateRandomTuples(final int N, final BlockSchema schema) throws ExecException
{
    List<Tuple> tuples = new ArrayList<Tuple>();

    for (int i = 1; i <= N; i++)
    {
        Tuple t = createRandomTuple(schema);
        tuples.add(t);
    }
    return tuples;
}
 
Example #22
Source File: POPartialAgg.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Runs the provided key-value pair through the aggregator plans.
 * @param key
 * @param value
 * @return Result, containing a tuple of form (key, tupleReturnedByPlan1, tupleReturnedByPlan2, ...)
 * @throws ExecException
 */
private Result getOutput(Object key, Tuple value) throws ExecException {
    Tuple output = TF.newTuple(valuePlans.size() + 1);
    output.set(0, key);

    for (int i = 0; i < valuePlans.size(); i++) {
        valuePlans.get(i).attachInput(value);
        Result valRes = getResult(valueLeaves.get(i));
        if (valRes.returnStatus == POStatus.STATUS_ERR) {
            return valRes;
        }
        output.set(i + 1, valRes.result);
    }
    return new Result(POStatus.STATUS_OK, output);
}
 
Example #23
Source File: Utils.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceSchema getSchema(LoadFunc wrappedLoadFunc, String location, boolean checkExistence, Job job)
        throws IOException {
    Configuration conf = job.getConfiguration();
    if (checkExistence) {
        Path path = new Path(location);
        if (!FileSystem.get(conf).exists(path)) {
            // At compile time in batch mode, the file may not exist
            // (such as intermediate file). Just return null - the
            // same way as we would if we did not get a valid record
            return null;
        }
    }
    ReadToEndLoader loader = new ReadToEndLoader(wrappedLoadFunc, conf, location, 0);
    // get the first record from the input file
    // and figure out the schema from the data in
    // the first record
    Tuple t = loader.getNext();
    if (t == null) {
        // we couldn't get a valid record from the input
        return null;
    }
    int numFields = t.size();
    Schema s = new Schema();
    for (int i = 0; i < numFields; i++) {
        try {
            s.add(DataType.determineFieldSchema(t.get(i)));
        }
        catch (Exception e) {
            int errCode = 2104;
            String msg = "Error while determining schema of SequenceFileStorage data.";
            throw new ExecException(msg, errCode, PigException.BUG, e);
        }
    }
    return new ResourceSchema(s);
}
 
Example #24
Source File: PigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
private void addJarsFromProperties() throws ExecException {
    //add jars from properties to extraJars
    String jar_str = pigContext.getProperties().getProperty("pig.additional.jars");
    if (jar_str==null) {
        jar_str = "";
    }
    jar_str = jar_str.replaceAll(File.pathSeparator, ",");
    if (!jar_str.isEmpty()) {
        jar_str += ",";
    }

    String jar_str_comma = pigContext.getProperties().getProperty("pig.additional.jars.uris");
    if (jar_str_comma!=null && !jar_str_comma.isEmpty()) {
        jar_str = jar_str + jar_str_comma;
    }

    if(jar_str != null && !jar_str.isEmpty()){
        // Use File.pathSeparator (":" on Linux, ";" on Windows)
        // to correctly handle path aggregates as they are represented
        // on the Operating System.
        for(String jar : jar_str.split(",")){
            try {
                registerJar(jar);
            } catch (IOException e) {
                int errCode = 4010;
                String msg =
                    "Failed to register jar :" + jar + ". Caught exception.";
                throw new ExecException(
                        msg,
                        errCode,
                        PigException.USER_ENVIRONMENT,
                        e
                );
            }
        }
    }
}
 
Example #25
Source File: TestProject.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetNext() throws ExecException, IOException {
    t = tRandom;
    proj.attachInput(t);
    for (int j = 0; j < t.size(); j++) {
        proj.attachInput(t);
        proj.setColumn(j);

        res = proj.getNext();
        assertEquals(POStatus.STATUS_OK, res.returnStatus);
        assertEquals(t.get(j), res.result);
    }
}
 
Example #26
Source File: COUNT.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    try {
        return mTupleFactory.newTuple(sum(input));
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;                
        String msg = "Error while computing count in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }
}
 
Example #27
Source File: TestPOPartialAgg.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartialOneInput3() throws ExecException, ParserException {
    // input tuple has key, and bag containing SUM.Init output
    String[] tups1 = { "(1,(null))" };
    Tuple t = Util.getTuplesFromConstantTupleStrings(tups1).get(0);
    checkSingleRow(t);
}
 
Example #28
Source File: COUNT_STAR.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Long exec(Tuple input) throws IOException {
    try {
        DataBag bag = (DataBag)input.get(0);
        return bag.size();
    } catch (ExecException ee) {
        throw ee;
    } catch (Exception e) {
        int errCode = 2106;                
        String msg = "Error while computing count in " + this.getClass().getSimpleName();
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }
}
 
Example #29
Source File: POCastDummy.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Result getNextString() throws ExecException {
    Result res = inputs.get(0).getNextString();

    if(res.returnStatus != POStatus.STATUS_OK){
        return res;
    }
    
    if(res.result instanceof DataByteArray){
        String rslt = ((DataByteArray)res.result).toString();
        res.result = rslt;
        return res;
    }
    return new Result();
}
 
Example #30
Source File: TestPOBinCond.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPOBinCondWithLong() throws  ExecException, PlanException {
    bag= getBag(DataType.LONG);
    TestPoBinCondHelper testHelper= new TestPoBinCondHelper(DataType.LONG, new Long(1L) );

    for (Tuple t : bag) {
        testHelper.getPlan().attachInput(t);
        Long value = (Long) t.get(0);
        int expected = (value.longValue() == 1L )? 1:0 ;
        Integer dummy = new Integer(0);
        Integer result=(Integer)testHelper.getOperator().getNextInteger().result;
        int actual = result.intValue();
        assertEquals( expected, actual );
    }
}