Java Code Examples for org.apache.pig.data.Tuple#size()

The following examples show how to use org.apache.pig.data.Tuple#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestXMLLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
public void testXMLLoaderShouldSupportNestedTagWithSameName() throws Exception {
   String filename = TestHelper.createTempFile(nestedTags, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('event') as (doc:chararray);";
   pig.registerQuery(query);
   Iterator<?> it = pig.openIterator("A");
   int tupleCount = 0;
   while (it.hasNext()) {
       Tuple tuple = (Tuple) it.next();
       if (tuple == null)
           break;
       else {
           if (tuple.size() > 0) {
               tupleCount++;
           }
       }
   }
   assertEquals(3, tupleCount);
}
 
Example 2
Source File: SetUnion.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException
{
  DataBag outputBag = bagFactory.newDistinctBag();

  try {
    for (int i=0; i < input.size(); i++) {
      Object o = input.get(i);
      if (!(o instanceof DataBag))
        throw new RuntimeException("parameters must be databags");

      DataBag inputBag = (DataBag) o;
      for (Tuple elem : inputBag) {
        outputBag.add(elem);
      }
    }

    return outputBag;
  }
  catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example 3
Source File: VespaDocumentOperation.java    From vespa with Apache License 2.0 6 votes vote down vote up
private static boolean shouldWriteTupleStart(Tuple tuple, String name, Properties properties) {
    if (tuple.size() > 1 || properties == null) {
        return true;
    }
    String simpleArrayFields = properties.getProperty(SIMPLE_ARRAY_FIELDS);
    if (simpleArrayFields == null) {
        return true;
    }
    if (simpleArrayFields.equals("*")) {
        return false;
    }
    String[] fields = simpleArrayFields.split(",");
    for (String field : fields) {
        if (field.trim().equalsIgnoreCase(name)) {
            return false;
        }
    }
    return true;
}
 
Example 4
Source File: BuildBloom.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
 
Example 5
Source File: TransposeTupleToBag.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public DataBag exec(Tuple input) throws IOException
{
  // initialize a reverse mapping
  HashMap<Integer, String> positionToAlias = new HashMap<Integer, String>();
  for (String alias : getFieldAliases().keySet()) {
    positionToAlias.put(getFieldAliases().get(alias), alias);
  }
  DataBag output = BagFactory.getInstance().newDefaultBag();
  for (int i=0; i<input.size(); i++) {
    Tuple tuple = TupleFactory.getInstance().newTuple();
    tuple.append(positionToAlias.get(i));
    tuple.append(input.get(i));
    output.add(tuple);
  }
  return output;
}
 
Example 6
Source File: ROUND.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * java level API
 * @param input expects a single numeric value
 * @param output returns a single numeric value, 
 * the closest long to the argument
 */
@Override
public Long exec(Tuple input) throws IOException {
       if (input == null || input.size() == 0 || input.get(0) == null)
           return null;

       try{
           Double d =  DataType.toDouble(input.get(0));
	    return Math.round(d);
       } catch (NumberFormatException nfe){
           System.err.println("Failed to process input; error - " + nfe.getMessage());
           return null;
       } catch (Exception e){
           throw new IOException("Caught exception processing input row ", e);
       }
}
 
Example 7
Source File: DoubleNextup.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
   * java level API
   * @param input expects a single numeric value
   * @param output returns a single numeric value, nextup value of the argument
   */
  public Double exec(Tuple input) throws IOException {
      if (input == null || input.size() == 0 || input.get(0) == null)
          return null;
      Double d;
      try{
         d = (Double)input.get(0);
      } catch (Exception e){
          throw new IOException("Caught exception processing input row ", e);
      }

return Math.nextUp(d);
  }
 
Example 8
Source File: DIFF.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Compares a tuple with two fields. Emits any differences.
 * @param input a tuple with exactly two fields.
 * @throws IOException if there are not exactly two fields in a tuple
 */
@Override
public DataBag exec(Tuple input) throws IOException {
    if (input.size() != 2) {
        int errCode = 2107;
        String msg = "DIFF expected two inputs but received " + input.size() + " inputs.";
        throw new ExecException(msg, errCode, PigException.BUG);
    }
    try {
        DataBag output = mBagFactory.newDefaultBag();
        Object o1 = input.get(0);
        if (o1 instanceof DataBag) {
            DataBag bag1 = (DataBag)o1;
            DataBag bag2 = (DataBag)input.get(1);
            computeDiff(bag1, bag2, output);
        } else {
            Object d1 = input.get(0);
            Object d2 = input.get(1);
            if (!d1.equals(d2)) {
                output.add(mTupleFactory.newTuple(d1));
                output.add(mTupleFactory.newTuple(d2));
            }
        }
        return output;
    } catch (ExecException ee) {
        throw ee;
    }
}
 
Example 9
Source File: LTRIM.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public String exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
        return null;
    }
    try {
        String str = (String) input.get(0);
        if (str == null) return null;
        if (str.length() == 0) return str;
        return str.replaceFirst("^ +", "");
    } catch (ExecException e) {
        warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
        return null;
    }
}
 
Example 10
Source File: FloatVAR.java    From datafu with Apache License 2.0 5 votes vote down vote up
static protected long count(Tuple input) throws ExecException {
    DataBag values = (DataBag)input.get(0);
    long cnt = 0;
    Iterator<Tuple> it = values.iterator();
    while (it.hasNext()){
        Tuple t = (Tuple)it.next();
        if (t != null && t.size() > 0 && t.get(0) != null)
            cnt ++;
    }
                
    return cnt;
}
 
Example 11
Source File: ExtractHour.java    From spork with Apache License 2.0 5 votes vote down vote up
public String exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    try{
        String timestamp = (String)input.get(0);
        return timestamp.substring(6, 8);
    }catch(Exception e){
        System.err.println("ExtractHour: failed to proces input; error - " + e.getMessage());
        return null;
    }
}
 
Example 12
Source File: TestUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Integer exec(Tuple input) throws IOException {
    int res = 0;
    if (input == null || input.size() == 0) {
        return res;
    }
    for (int i = 0; i < input.size(); i++) {
        res += (Integer)input.get(i);
    }
    return res;
}
 
Example 13
Source File: WeightedRangePartitioner.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * @param value
 * @return
 * @throws ExecException
 */
protected float[] getProbVec(Tuple values) throws ExecException {
    float[] probVec = new float[values.size()];
    for(int i = 0; i < values.size(); i++) {
        probVec[i] = (Float)values.get(i);
    }
    return probVec;
}
 
Example 14
Source File: UCFIRST.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public String exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
        return null;
    }
    try {
        String str = (String) input.get(0);
        if (str == null) return null;
        if (str.length() == 0) return str;
        return Character.toUpperCase(str.charAt(0))+str.substring(1);
    } catch (ExecException e) {
        warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
        return null;
    }
}
 
Example 15
Source File: TestXMLLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
public void testXMLLoaderShouldLoadBasicBzip2Files() throws Exception {
 String filename = TestHelper.createTempFile(data, "");
 Process bzipProc = Runtime.getRuntime().exec("bzip2 "+filename);
 int waitFor = bzipProc.waitFor();

 if(waitFor != 0) {
     fail ("Failed to create the class");
 }

 filename = filename + ".bz2";

 try {
     PigServer pigServer = new PigServer (ExecType.LOCAL);
     String loadQuery = "A = LOAD '" + Util.encodeEscape(filename) + "' USING org.apache.pig.piggybank.storage.XMLLoader('property') as (doc:chararray);";
     pigServer.registerQuery(loadQuery);

     Iterator<Tuple> it = pigServer.openIterator("A");
     int tupleCount = 0;
     while (it.hasNext()) {
         Tuple tuple = (Tuple) it.next();
         if (tuple == null)
             break;
         else {
             //TestHelper.examineTuple(expected, tuple, tupleCount);
             if (tuple.size() > 0) {
                 tupleCount++;
             }
         }
     }
     assertEquals(2, tupleCount);

 } finally {
     new File(filename).delete();
 }
}
 
Example 16
Source File: TRIM.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public String exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
        return null;
    }
    try {
        String str = (String) input.get(0);
        if (str == null) return null;
        if (str.length() == 0) return str;
        return str.trim();
    } catch (ExecException e) {
        warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
        return null;
    }
}
 
Example 17
Source File: AllFirstLetter.java    From spork with Apache License 2.0 5 votes vote down vote up
public String exec(Tuple input) throws IOException {
    result = "";
    DataBag bag = (DataBag) input.get(0);
    Iterator<Tuple> it = bag.iterator();
    while (it.hasNext()) {
        Tuple t = it.next();
        if (t != null && t.size() > 0 && t.get(0) != null)
            result += t.get(0).toString().substring(0, 1);
    }
    return result;
}
 
Example 18
Source File: SubtractDuration.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public DateTime exec(Tuple input) throws IOException {
    if (input == null || input.size() < 2 || input.get(0) == null || input.get(1) == null) {
        return null;
    }
    
    return ((DateTime) input.get(0)).minus(new Period((String) input.get(1)));
}
 
Example 19
Source File: PigQueryInterpreter.java    From zeppelin with Apache License 2.0 4 votes vote down vote up
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
  // '-' is invalid for pig alias
  String alias = "paragraph_" + context.getParagraphId().replace("-", "_");
  String[] lines = st.split("\n");
  List<String> queries = new ArrayList<>();
  for (int i = 0; i < lines.length; ++i) {
    if (i == lines.length - 1) {
      lines[i] = alias + " = " + lines[i];
    }
    queries.add(lines[i]);
  }

  StringBuilder resultBuilder = new StringBuilder("%table ");
  try {
    pigServer.setJobName(createJobName(st, context));
    File tmpScriptFile = PigUtils.createTempPigScript(queries);
    // each thread should its own ScriptState & PigStats
    ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
    // reset PigStats, otherwise you may get the PigStats of last job in the same thread
    // because PigStats is ThreadLocal variable
    PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
    PigScriptListener scriptListener = new PigScriptListener();
    ScriptState.get().registerListener(scriptListener);
    listenerMap.put(context.getParagraphId(), scriptListener);
    pigServer.registerScript(tmpScriptFile.getAbsolutePath());
    Schema schema = pigServer.dumpSchema(alias);
    boolean schemaKnown = (schema != null);
    if (schemaKnown) {
      for (int i = 0; i < schema.size(); ++i) {
        Schema.FieldSchema field = schema.getField(i);
        resultBuilder.append(field.alias != null ? field.alias : "col_" + i);
        if (i != schema.size() - 1) {
          resultBuilder.append("\t");
        }
      }
      resultBuilder.append("\n");
    }
    Iterator<Tuple> iter = pigServer.openIterator(alias);
    boolean firstRow = true;
    int index = 0;
    while (iter.hasNext() && index < maxResult) {
      index++;
      Tuple tuple = iter.next();
      if (firstRow && !schemaKnown) {
        for (int i = 0; i < tuple.size(); ++i) {
          resultBuilder.append("c_" + i + "\t");
        }
        resultBuilder.append("\n");
        firstRow = false;
      }
      resultBuilder.append(StringUtils.join(tuple.iterator(), "\t"));
      resultBuilder.append("\n");
    }
    if (index >= maxResult && iter.hasNext()) {
      resultBuilder.append("\n");
      resultBuilder.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, MAX_RESULTS));
    }
  } catch (IOException e) {
    // Extract error in the following order
    // 1. catch FrontendException, FrontendException happens in the query compilation phase.
    // 2. catch ParseException for syntax error
    // 3. PigStats, This is execution error
    // 4. Other errors.
    if (e instanceof FrontendException) {
      FrontendException fe = (FrontendException) e;
      if (!fe.getMessage().contains("Backend error :")) {
        LOGGER.error("Fail to run pig query.", e);
        return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
      }
    }
    if (e.getCause() instanceof ParseException) {
      return new InterpreterResult(Code.ERROR, e.getMessage());
    }
    PigStats stats = PigStats.get();
    if (stats != null) {
      String errorMsg = stats.getDisplayString();
      if (errorMsg != null) {
        return new InterpreterResult(Code.ERROR, errorMsg);
      }
    }
    LOGGER.error("Fail to run pig query.", e);
    return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
  } finally {
    listenerMap.remove(context.getParagraphId());
  }
  return new InterpreterResult(Code.SUCCESS, resultBuilder.toString());
}
 
Example 20
Source File: BigIntegerAbs.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public BigInteger exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0 || input.get(0) == null)
        return null;
    return ((BigInteger)input.get(0)).abs();
}