Java Code Examples for org.apache.pig.newplan.logical.relational.LogicalSchema#size()

The following examples show how to use org.apache.pig.newplan.logical.relational.LogicalSchema#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ColumnPruneHelper.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void visit(LOStore store) throws FrontendException {
    Set<Long> output = setOutputUids(store);

    if (output.isEmpty()) {
        // to deal with load-store-load-store case
        LogicalSchema s = store.getSchema();
        if (s == null) {
            throw new SchemaNotDefinedException("Schema for " + store.getName() + " is not defined.");
        }

        for(int i=0; i<s.size(); i++) {
            output.add(s.getField(i).uid);
        }
    }

    // for store, input uids are same as output uids
    store.annotate(INPUTUIDS, output);
}
 
Example 2
Source File: AugmentBaseDataVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
private boolean inInput(Tuple newTuple, DataBag input, LogicalSchema schema) throws ExecException {
    boolean result;
    for (Iterator<Tuple> iter = input.iterator(); iter.hasNext();) {
        result = true;
        Tuple tmp = iter.next();
        for (int i = 0; i < schema.size(); ++i)
            if (!newTuple.get(i).equals(tmp.get(i)))
            {
                result = false;
                break;
            }
        if (result)
            return true;
    }
    return false;
}
 
Example 3
Source File: DereferenceExpression.java    From spork with Apache License 2.0 6 votes vote down vote up
private List<Integer> translateAliasToPos(LogicalSchema schema, List<Object> rawColumns) throws FrontendException {
    List<Integer> columns = new ArrayList<Integer>();
    for( Object rawColumn : rawColumns ) {
        if( rawColumn instanceof Integer ) {
        	if (schema!=null && ((Integer)rawColumn>=schema.size() || (Integer)rawColumn<0)) {
        	    throw new FrontendException("Index "+rawColumn + " out of range in schema:" + schema.toString(false), 1127);
        	}
            columns.add( (Integer)rawColumn );
        } else {
            int pos = schema.getFieldPosition((String)rawColumn);
            if( pos != -1) {
                columns.add( pos );
                continue;
            } else {
                throw new FrontendException("Cannot find field " + rawColumn + " in " + schema.toString(false), 1128);
            }
        }
    }
    return columns;
}
 
Example 4
Source File: ColumnPruneHelper.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void visit(LODistinct distinct) throws FrontendException {
    setOutputUids(distinct);
    
    Set<Long> input = new HashSet<Long>();

    // Every field is required
    LogicalSchema s = distinct.getSchema();
    if (s == null) {
        throw new SchemaNotDefinedException("Schema for " + distinct.getName() + " is not defined.");
    }

    for(int i=0; i<s.size(); i++) {
        input.add(s.getField(i).uid);
    }
    distinct.annotate(INPUTUIDS, input);
}
 
Example 5
Source File: ColumnPruneVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void addForEachIfNecessary(LogicalRelationalOperator op) throws FrontendException {
    Set<Long> outputUids = (Set<Long>)op.getAnnotation(ColumnPruneHelper.OUTPUTUIDS);
    if (outputUids!=null) {
        LogicalSchema schema = op.getSchema();
        Set<Integer> columnsToDrop = new HashSet<Integer>();

        for (int i=0;i<schema.size();i++) {
            if (!outputUids.contains(schema.getField(i).uid))
                columnsToDrop.add(i);
        }

        if (!columnsToDrop.isEmpty()) {
            LOForEach foreach = Util.addForEachAfter((LogicalPlan)op.getPlan(), op, 0, columnsToDrop);
            foreach.getSchema();
        }
    }
}
 
Example 6
Source File: AddForEach.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void addForeach(LogicalRelationalOperator op) throws FrontendException {
    Set<Long> outputUids = (Set<Long>)op.getAnnotation(ColumnPruneHelper.OUTPUTUIDS);
    LogicalSchema schema = op.getSchema();
    Set<Integer> columnsToDrop = new HashSet<Integer>();
    
    for (int i=0;i<schema.size();i++) {
        if (!outputUids.contains(schema.getField(i).uid))
            columnsToDrop.add(i);
    }
    
    if (!columnsToDrop.isEmpty()) {
        Util.addForEachAfter((LogicalPlan)op.getPlan(), op, 0, columnsToDrop);
    }
}
 
Example 7
Source File: AddForEach.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
// check if an LOForEach should be added after the logical operator
private boolean shouldAdd(LogicalRelationalOperator op) throws FrontendException {
    if (op instanceof LOForEach) {
        return false;
    }
    
    Set<Long> outputUids = (Set<Long>)op.getAnnotation(ColumnPruneHelper.OUTPUTUIDS);
    if (outputUids==null)
        return false;
    
    LogicalSchema schema = op.getSchema();
    if (schema==null)
        return false;
    
    // check if there is already a foreach
    List<Operator> ll = op.getPlan().getSuccessors(op);
    if (ll != null && ll.get(0) instanceof LOForEach) {
        return false;
    }
    
    Set<Integer> columnsToDrop = new HashSet<Integer>();
    
    for (int i=0;i<schema.size();i++) {
        if (!outputUids.contains(schema.getField(i).uid))
            columnsToDrop.add(i);
    }
    
    if (!columnsToDrop.isEmpty()) return true;
    
    return false;
}
 
Example 8
Source File: PartitionFilterOptimizer.java    From spork with Apache License 2.0 5 votes vote down vote up
protected void setupColNameMaps() throws FrontendException {
    LogicalSchema loLoadSchema = loLoad.getSchema();
    LogicalSchema loadFuncSchema = loLoad.getDeterminedSchema();
     for(int i = 0; i < loadFuncSchema.size(); i++) {
        colNameMap.put(loadFuncSchema.getField(i).alias,
                (i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                    loadFuncSchema.getField(i).alias));

        reverseColNameMap.put((i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                    loadFuncSchema.getField(i).alias),
                    loadFuncSchema.getField(i).alias);
    }
}
 
Example 9
Source File: PigTypes.java    From calcite with Apache License 2.0 5 votes vote down vote up
/**
 * Converts a Pig tuple schema to a SQL row type.
 *
 * @param pigSchema Pig tuple schema
 * @param nullable true if the type is nullable
 * @return a SQL row type
 */
static RelDataType convertSchema(LogicalSchema pigSchema, boolean nullable) {
  if (pigSchema != null && pigSchema.size() > 0) {
    List<String> fieldNameList = new ArrayList<>();
    List<RelDataType> typeList = new ArrayList<>();
    for (int i = 0; i < pigSchema.size(); i++) {
      final LogicalSchema.LogicalFieldSchema subPigField = pigSchema.getField(i);
      fieldNameList.add(subPigField.alias != null ? subPigField.alias : "$" + i);
      typeList.add(convertSchemaField(subPigField, nullable));
    }
    return TYPE_FACTORY.createStructType(typeList, fieldNameList, nullable);
  }
  return new DynamicTupleRecordType(TYPE_FACTORY);
}
 
Example 10
Source File: TypeCastInserter.java    From spork with Apache License 2.0 5 votes vote down vote up
private boolean atLeastOneCastNeeded(LogicalSchema determinedSchema, LogicalSchema s) {
    for (int i = 0; i < s.size(); i++) {
        LogicalSchema.LogicalFieldSchema fs = s.getField(i);
        if (fs.type != DataType.BYTEARRAY && (determinedSchema == null || (!fs.isEqual(determinedSchema.getField(i))))) {
            // we have to cast this field from the default BYTEARRAY type to
            // whatever the user specified in the 'AS' clause of the LOAD
            // statement (the fs.type).
            return true;
        }
    }
    return false;
}
 
Example 11
Source File: ColumnPruneHelper.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
// Get output uid from output schema. If output schema does not exist,
// throw exception
private Set<Long> setOutputUids(LogicalRelationalOperator op) throws FrontendException {

    List<Operator> ll = plan.getSuccessors(op);
    Set<Long> uids = new HashSet<Long>();

    LogicalSchema s = op.getSchema();
    if (s == null) {
        throw new SchemaNotDefinedException("Schema for " + op.getName() + " is not defined.");
    }

    if (ll != null) {
        // if this is not sink, the output uids are union of input uids of its successors
        for(Operator succ: ll) {
            Set<Long> inputUids = (Set<Long>)succ.getAnnotation(INPUTUIDS);
            if (inputUids != null) {
                Iterator<Long> iter = inputUids.iterator();
                while(iter.hasNext()) {
                    long uid = iter.next();

                    if (s.findField(uid) != -1) {
                        uids.add(uid);
                    }
                }
            }
        }
    } else {
        // if  it's leaf, set to its schema
        for(int i=0; i<s.size(); i++) {
            uids.add(s.getField(i).uid);
        }
    }

    op.annotate(OUTPUTUIDS, uids);
    return uids;
}
 
Example 12
Source File: FilterAboveForeach.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Get all uids from Projections of this FilterOperator
 * @param filter
 * @return Set of uid
 */
private Pair<List<Long>, List<Byte>> getFilterProjectionUids(LOFilter filter) throws FrontendException {
    List<Long> uids = new ArrayList<Long>();
    List<Byte> types = new ArrayList<Byte>();
    if( filter != null ) {
        LogicalExpressionPlan filterPlan = filter.getFilterPlan();
        Iterator<Operator> iter = filterPlan.getOperators();
        Operator op = null;
        while( iter.hasNext() ) {
            op = iter.next();
            if( op instanceof ProjectExpression ) {
                ProjectExpression proj = (ProjectExpression)op;
                if( proj.isProjectStar() ) {
                    //project-range is always expanded when schema is
                    //available, so nothing to do here for it
                    LogicalRelationalOperator pred = (LogicalRelationalOperator)filter.getPlan().getPredecessors(filter).get(0);
                    LogicalSchema predSchema = pred.getSchema();
                    if (predSchema!=null) {
                        for (int i=0;i<predSchema.size();i++) {
                            uids.add(predSchema.getField(i).uid);
                            types.add(predSchema.getField(i).type);
                        }
                    }
                } else {
                    uids.add(proj.getFieldSchema().uid);
                    types.add(proj.getFieldSchema().type);
                }
            }
        }

    }

    Pair<List<Long>, List<Byte>> result = new Pair<List<Long>, List<Byte>>(uids, types);
    return result;
}
 
Example 13
Source File: PredicatePushdownOptimizer.java    From spork with Apache License 2.0 5 votes vote down vote up
protected void setupColNameMaps() throws FrontendException {
    LogicalSchema loLoadSchema = loLoad.getSchema();
    LogicalSchema loadFuncSchema = loLoad.getDeterminedSchema();
     for(int i = 0; i < loadFuncSchema.size(); i++) {
        colNameMap.put(loadFuncSchema.getField(i).alias,
                (i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                    loadFuncSchema.getField(i).alias));

        reverseColNameMap.put((i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                    loadFuncSchema.getField(i).alias),
                    loadFuncSchema.getField(i).alias);
    }
}
 
Example 14
Source File: LineageFindRelVisitor.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Find single load func spec associated with this relation.
 * If the relation has schema, all uids in schema should be associated
 * with same load func spec. if it does not have schema check the existing
 * mapping
 * @param relOp
 * @return
 * @throws FrontendException
 */
private FuncSpec getAssociatedLoadFunc(LogicalRelationalOperator relOp) throws FrontendException {
    LogicalSchema schema = relOp.getSchema();
    FuncSpec funcSpec = null;
    if(schema != null){
        if(schema.size() == 0)
            return null;
        funcSpec = uid2LoadFuncMap.get(schema.getField(0).uid);
        if(funcSpec != null) {
            for(int i=1; i<schema.size(); i++){
                LogicalFieldSchema fs = schema.getField(i);
                if(! haveIdenticalCasters(funcSpec,
                        uid2LoadFuncMap.get(fs.uid))){
                    //all uid are not associated with same func spec, there is no
                    // single func spec that represents all the fields
                    funcSpec = null;
                    break;
                }
            }
        }
    }
    
    if(funcSpec == null){
        // If relOp is LOForEach and contains UDF, byte field could come from UDF.
        // We don't assume it share the LoadCaster with predecessor
        if (relOp instanceof LOForEach) {
            UDFFinder udfFinder = new UDFFinder(((LOForEach) relOp).getInnerPlan());
            udfFinder.visit();
            if (udfFinder.getUDFList().size()!=0)
                return null;
        }
        
        funcSpec = rel2InputFuncMap.get(relOp);
    }

    return funcSpec;
}
 
Example 15
Source File: TypeCastInserter.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public boolean check(OperatorPlan matched) throws FrontendException {
    LogicalRelationalOperator op = (LogicalRelationalOperator)matched.getSources().get(0);
    LogicalSchema s = op.getSchema();
    if (s == null) return false;

    // only process each node once
    if (isCastAdjusted(op)) return false;

    if (op instanceof LOLoad) {
        if (((LOLoad)op).getScriptSchema()==null) return false;
    }
    else {
        if (((LOStream)op).getScriptSchema()==null) return false;
    }

    // Now that we've narrowed it down to an operation that *can* have casts added,
    // (because the user specified some types which might not match the data) let's 
    // see if they're actually needed:
    LogicalSchema determinedSchema = determineSchema(op);
    if(atLeastOneCastNeeded(determinedSchema, s)) {
        return true;
    }

    if(determinedSchema == null || determinedSchema.size() != s.size()) {
        // we don't know what the data looks like, but the user has specified
        // that they want a certain number of fields loaded. We'll use a 
        // projection (or pruning) to make sure the columns show up (with NULL
        // values) or are truncated from the right hand side of the input data.
        return true;
    }

    return false;
}
 
Example 16
Source File: ProjectExpression.java    From spork with Apache License 2.0 5 votes vote down vote up
private int findColNum(String alias) throws FrontendException {
    LogicalPlan lp = (LogicalPlan)attachedRelationalOp.getPlan();
    List<Operator> inputs = lp.getPredecessors( attachedRelationalOp );
    LogicalRelationalOperator input = (LogicalRelationalOperator)inputs.get( getInputNum() );
    LogicalSchema inputSchema = input.getSchema();

    if( alias != null ) {
        int colNum = inputSchema == null ? -1 : inputSchema.getFieldPosition( alias );
        if( colNum == -1 ) {
        	String msg = "Invalid field projection. Projected field [" + alias + "] does not exist";
            if( inputSchema != null )
            	msg += " in schema: " + inputSchema.toString( false );
            msg += ".";
            throw new PlanValidationException( this, msg, 1025 );
        }
        return colNum;
    } else {
        int col = getColNum();
        if( inputSchema != null && col >= inputSchema.size() ) {
            throw new PlanValidationException( this,
                    "Out of bound access. Trying to access non-existent column: " +
                    col + ". Schema " +  inputSchema.toString(false) +
                    " has " + inputSchema.size() + " column(s)." , 1000);
        }
        return col;
    }
}
 
Example 17
Source File: ColumnPruneHelper.java    From spork with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public boolean check() throws FrontendException {
    List<Operator> sources = currentPlan.getSources();
    // if this rule has run before, just return false
    if (sources.size() > 1 && sources.get(0).getAnnotation(INPUTUIDS) != null) {
        clearAnnotation();
        return false;
    }

    // create sub-plan that ends with foreach
    subPlan = getSubPlan();
    if (subPlan.size() == 0) {
        clearAnnotation();
        return false;
    }

    ColumnDependencyVisitor v = new ColumnDependencyVisitor(currentPlan);
    try {
        v.visit();
    }catch(SchemaNotDefinedException e) {
        // if any operator has an unknown schema, just return false
        clearAnnotation();
        return false;
    }

    List<Operator> ll = subPlan.getSources();
    boolean found = false;
    for(Operator op: ll) {
        if (op instanceof LOLoad) {
            Set<Long> uids = (Set<Long>)op.getAnnotation(INPUTUIDS);
            LogicalSchema s = ((LOLoad) op).getSchema();
            Set<Integer> required = getColumns(s, uids);

            if (required.size() < s.size()) {
                op.annotate(REQUIREDCOLS, required);
                found = true;
            }
        }
    }

    if (!found)
        clearAnnotation();

    return found;
}
 
Example 18
Source File: ProjectStarExpanderUtil.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * If the argument project is a project-star or project-range that
 * can be expanded, find the position of first and last columns 
 * it should project  
 * @param expPlan
 * @param proj
 * @return pair that has the first and last columns that need to be projected 
 * @throws FrontendException
 */
static Pair<Integer, Integer> getProjectStartEndCols(
        LogicalExpressionPlan expPlan, ProjectExpression proj)
        throws FrontendException {
    
    // get the input schema first
    
    LogicalRelationalOperator relOp = proj.getAttachedRelationalOp();

    // list of inputs of attached relation
    List<Operator> inputRels = relOp.getPlan().getPredecessors(relOp);

    //the relation that is input to this project 
    LogicalRelationalOperator inputRel =
        (LogicalRelationalOperator) inputRels.get(proj.getInputNum());

    LogicalSchema inputSchema = inputRel.getSchema();
    
    
    if(inputSchema == null && 
            (proj.isProjectStar() || (proj.isRangeProject() && proj.getEndCol() == -1))
    ){
        // can't expand if input schema is null and it is a project-star
        // or project-range-until-end
        return null;
    }

    //find first and last column in input schema to be projected
    int firstProjCol;
    int lastProjCol;

    //the range values are set in the project in LOInnerLoad
    if(proj.isRangeProject()){
        proj.setColumnNumberFromAlias();
        firstProjCol = proj.getStartCol();
        
        if(proj.getEndCol() >= 0)
            lastProjCol = proj.getEndCol();
        else
            lastProjCol = inputSchema.size() - 1;
    }else{
        //project-star
        firstProjCol = 0;
        lastProjCol = inputSchema.size() - 1;
    }
    return new Pair<Integer, Integer>(firstProjCol, lastProjCol);

}
 
Example 19
Source File: Util.java    From spork with Apache License 2.0 4 votes vote down vote up
public static LOForEach addForEachAfter(LogicalPlan plan, LogicalRelationalOperator op, int branch,
        Set<Integer> columnsToDrop) throws FrontendException {
    LOForEach foreach = new LOForEach(plan);

    plan.add(foreach);
    List<Operator> next = plan.getSuccessors(op);
    if (next != null) {
        LogicalRelationalOperator nextOp = (LogicalRelationalOperator)next.get(branch);
        plan.insertBetween(op, foreach, nextOp);
        foreach.setAlias(op.getAlias());
    }
    else {
        plan.connect(op, foreach);
    }

    LogicalPlan innerPlan = new LogicalPlan();
    foreach.setInnerPlan(innerPlan);

    LogicalSchema schema = op.getSchema();

    // build foreach inner plan
    List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>();
    LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[schema.size()-columnsToDrop.size()]);
    innerPlan.add(gen);

    for (int i=0, j=0; i<schema.size(); i++) {
        if (columnsToDrop.contains(i)) {
            continue;
        }

        LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, i);
        innerPlan.add(innerLoad);
        innerPlan.connect(innerLoad, gen);

        LogicalExpressionPlan exp = new LogicalExpressionPlan();
        ProjectExpression prj = new ProjectExpression(exp, j++, -1, gen);
        exp.add(prj);
        exps.add(exp);
    }
    return foreach;
}