Java Code Examples for org.apache.pig.data.DataType#isSchemaType()

The following examples show how to use org.apache.pig.data.DataType#isSchemaType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LogicalSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/***
 * Compare two field schema for equality

 * @param relaxInner If true, we don't check inner tuple schemas
 * @param relaxAlias If true, we don't check aliases
 * @return true if FieldSchemas are equal, false otherwise
 */
public static boolean equals(LogicalFieldSchema fschema,
                             LogicalFieldSchema fother,
                             boolean relaxInner,
                             boolean relaxAlias) {
    if( fschema == null || fother == null ) {
        return false ;
    }

    if( fschema.type != fother.type ) {
        return false ;
    }


    if (!relaxAlias) {
        if ( fschema.alias == null && fother.alias == null ) {
            // good
        } else if ( fschema.alias == null ) {
            return false ;
        } else if( !fschema.alias.equals( fother.alias ) ) {
            return false ;
        }
    }

    if ( (!relaxInner) && (DataType.isSchemaType(fschema.type))) {
        // Don't do the comparison if both embedded schemas are
        // null.  That will cause Schema.equals to return false,
        // even though we want to view that as true.
        if (!(fschema.schema == null && fother.schema == null)) {
            // compare recursively using schema
            if (!LogicalSchema.equals(fschema.schema, fother.schema, false, relaxAlias)) {
                return false ;
            }
        }
    }

    return true ;
}
 
Example 2
Source File: ResourceSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Construct using a {@link org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema} as the template.
 * @param fieldSchema fieldSchema to copy from
 */
public ResourceFieldSchema(LogicalFieldSchema fieldSchema) {
    type = fieldSchema.type;
    name = fieldSchema.alias;
    description = "autogenerated from Pig Field Schema";
    LogicalSchema inner = fieldSchema.schema;

    // allow partial schema 
    if (DataType.isSchemaType(type) && inner != null) {
        schema = new ResourceSchema(inner);
    } else {
        schema = null;
    }
}
 
Example 3
Source File: Schema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor for tuple fields.
 * 
 * @param a
 *            Alias, if known. If unknown leave null.
 * @param s
 *            Schema of this tuple.
 * @param t
 *            Type, using codes from
 *            {@link org.apache.pig.data.DataType}.
 * 
 */
public FieldSchema(String a, Schema s, byte t)  throws FrontendException {
    alias = a;
    schema = s;
    log.debug("t: " + t + " Bag: " + DataType.BAG + " tuple: " + DataType.TUPLE);
    
    if ((null != s) && !(DataType.isSchemaType(t))) {
        int errCode = 1020;
        throw new FrontendException("Only a BAG, TUPLE or MAP can have schemas. Got "
                + DataType.findTypeName(t), errCode, PigException.INPUT);
    }
    
    type = t;
    canonicalName = CanonicalNamer.getNewName();
}
 
Example 4
Source File: Schema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Recursively set NULL type to the specifid type 
 * @param fs the field schema whose NULL type has to be set 
 * @param t the specified type
 */
public static void setFieldSchemaDefaultType(Schema.FieldSchema fs, byte t) {
    if(null == fs) return;
    if(DataType.NULL == fs.type) {
        fs.type = t;
    }
    if(DataType.isSchemaType(fs.type)) {
        setSchemaDefaultType(fs.schema, t);
    }
}
 
Example 5
Source File: POCast.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public String name() {
    if (DataType.isSchemaType(resultType))
        return "Cast" + "[" + DataType.findTypeName(resultType)+":"
        + fieldSchema.calcCastString() + "]" + " - "
        + mKey.toString();
    else
        return "Cast" + "[" + DataType.findTypeName(resultType) + "]" + " - "
            + mKey.toString();
}
 
Example 6
Source File: LogicalSchema.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Check if FieldSchema inFs is castable to outFs
 * @param inFs
 * @param outFs
 * @return true if it is castable
 */
public static boolean castable(LogicalFieldSchema inFs,
        LogicalFieldSchema outFs) {
    
    if(outFs == null && inFs == null) {
        return false;
    }
    
    if (outFs == null) {
        return false ;
    }
    
    if (inFs == null) {
        return false ;
    }
    byte inType = inFs.type;
    byte outType = outFs.type;
    
    if (DataType.isSchemaType(outFs.type)) {
        if(inType == DataType.BYTEARRAY) {
            // good
        } else if (inType == outType) {
            // Don't do the comparison if either input inner schema 
            // is null/empty or  both inner schemas are
            // null.  That will cause Schema.equals to return false,
            // even though we want to view that as true.
            if (!(inFs.schema == null || inFs.schema.size() == 0 || 
                    (outFs.schema == null && inFs.schema == null))) { 
                // compare recursively using schema
                if (!LogicalSchema.castable(inFs.schema, outFs.schema)) {
                    return false ;
                }
            }
        } else {
            return false;
        }
    } else {
        if (inType == outType) {
            // good
        }
        else if (inType == DataType.BOOLEAN && (outType == DataType.CHARARRAY
                || outType == DataType.BYTEARRAY || DataType.isNumberType(outType))) {
            // good
        }
        else if (DataType.isNumberType(inType) && (outType == DataType.CHARARRAY
                || outType == DataType.BYTEARRAY || DataType.isNumberType(outType))
                || outType == DataType.BOOLEAN) {
            // good
        }
        else if (inType == DataType.CHARARRAY && (outType == DataType.BYTEARRAY
                || DataType.isNumberType(outType)) || outType == DataType.BOOLEAN) {
            // good
        }
        else if (inType == DataType.BYTEARRAY) {
            // good
        }
        else {
            return false;
        }
    }
    
    return true ;
}
 
Example 7
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 4 votes vote down vote up
/***************************************************************************
 * Compare two schemas for equality for argument matching purposes. This is
 * a more relaxed form of Schema.equals wherein first the Datatypes of the
 * field schema are checked for equality. Then if a field schema in the udf
 * schema is for a complex type AND if the inner schema is NOT null, check
 * for schema equality of the inner schemas of the UDF field schema and
 * input field schema
 *
 * @param inputSchema
 * @param udfSchema
 * @param ignoreByteArrays
 * @return true if FieldSchemas are equal for argument matching, false
 *         otherwise
 * @throws FrontendException
 */
public static boolean schemaEqualsForMatching(Schema inputSchema,
        Schema udfSchema, SchemaType udfSchemaType, boolean ignoreByteArrays) throws FrontendException {


    // If both of them are null, they are equal
    if ((inputSchema == null) && (udfSchema == null)) {
        return true;
    }

    // otherwise
    if (inputSchema == null) {
        return false;
    }

    if (udfSchema == null) {
        return false;
    }

    // the old udf schemas might not have tuple inside bag
    // fix that!
    udfSchema = Util.fixSchemaAddTupleInBag(udfSchema);

    if ((udfSchemaType == SchemaType.NORMAL) && (inputSchema.size() != udfSchema.size()))
        return false;
    if ((udfSchemaType == SchemaType.VARARG) && inputSchema.size() < udfSchema.size())
        return false;

    Iterator<FieldSchema> i = inputSchema.getFields().iterator();
    Iterator<FieldSchema> j = udfSchema.getFields().iterator();

    FieldSchema udfFieldSchema = null;
    while (i.hasNext()) {

        FieldSchema inputFieldSchema = i.next();
        if(inputFieldSchema == null)
            return false;

        //if there's no more UDF field: take the last one which is the vararg field
        udfFieldSchema = j.hasNext() ? j.next() : udfFieldSchema;
        
        if(ignoreByteArrays && inputFieldSchema.type == DataType.BYTEARRAY) {
            continue;
        }
        
        if (inputFieldSchema.type != udfFieldSchema.type) {
            return false;
        }

        // if a field schema in the udf schema is for a complex
        // type AND if the inner schema is NOT null, check for schema
        // equality of the inner schemas of the UDF field schema and
        // input field schema. If the field schema in the udf schema is
        // for a complex type AND if the inner schema IS null it means
        // the udf is applicable for all input which has the same type
        // for that field (irrespective of inner schema)
        // if it is a bag with empty tuple, then just rely on the field type
        if (DataType.isSchemaType(udfFieldSchema.type)
                && udfFieldSchema.schema != null
                && isNotBagWithEmptyTuple(udfFieldSchema)
        ) {
            // Compare recursively using field schema
            if (!FieldSchema.equals(inputFieldSchema, udfFieldSchema,
                    false, true)) {
                //try modifying any empty tuple to type of bytearray
                // and see if that matches. Need to do this for
                // backward compatibility -
                // User might have specified tuple with a bytearray
                // and this should also match an empty tuple

                FieldSchema inputFSWithBytearrayinTuple =
                    new FieldSchema(inputFieldSchema);

                convertEmptyTupleToBytearrayTuple(inputFSWithBytearrayinTuple);

                if (!FieldSchema.equals(inputFSWithBytearrayinTuple, udfFieldSchema,
                        false, true)) {
                    return false;
                }
            }
        }

    }
    return true;
}
 
Example 8
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Computes a modified version of manhattan distance between
 * the two schemas: s1 & s2. Here the value on the same axis
 * are preferred over values that change axis as this means
 * that the number of casts required will be lesser on the same
 * axis.
 *
 * However, this function ceases to be a metric as the triangle
 * inequality does not hold.
 *
 * Each schema is an s1.size() dimensional vector.
 * The ordering for each axis is as defined by castLookup.
 * Unallowed casts are returned a dist of INFINITY.
 * @param s1
 * @param s2
 * @param s2Type
 * @return
 */
private long fitPossible(Schema s1, Schema s2, SchemaType s2Type) {
    if(s1==null || s2==null) return INF;
    List<FieldSchema> sFields = s1.getFields();
    List<FieldSchema> fsFields = s2.getFields();
    
    if((s2Type == SchemaType.NORMAL) && (sFields.size()!=fsFields.size()))
        return INF;
    if((s2Type == SchemaType.VARARG) && (sFields.size() < fsFields.size()))
        return INF;
    long score = 0;
    int castCnt=0;
    for(int i=0;i<sFields.size();i++){
        FieldSchema sFS = sFields.get(i);
        if(sFS == null){
            return INF;
        }

        // if we have a byte array do not include it
        // in the computation of the score - bytearray
        // fields will be looked at separately outside
        // of this function
        if (sFS.type == DataType.BYTEARRAY)
            continue;
        
        //if we get to the vararg field (if defined) : take it repeatedly
        FieldSchema fsFS = ((s2Type == SchemaType.VARARG) && i >= s2.size()) ? 
                fsFields.get(s2.size() - 1) : fsFields.get(i);

        if(DataType.isSchemaType(sFS.type)){
            if(!FieldSchema.equals(sFS, fsFS, false, true))
                return INF;
        }
        if(FieldSchema.equals(sFS, fsFS, true, true)) continue;
        if(!castLookup.containsKey(sFS.type))
            return INF;
        if(!(castLookup.get(sFS.type).contains(fsFS.type)))
            return INF;
        score += (castLookup.get(sFS.type)).indexOf(fsFS.type) + 1;
        ++castCnt;
    }
    return score * castCnt;
}
 
Example 9
Source File: Schema.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Recursively compare two schemas to check if the input schema 
 * can be cast to the cast schema
 * @param castFs schema of the cast operator
 * @param  inputFs schema of the cast input
 * @return true or falsew!
 */
public static boolean castable(
        Schema.FieldSchema castFs,
        Schema.FieldSchema inputFs) {
    if(castFs == null && inputFs == null) {
        return false;
    }
    
    if (castFs == null) {
        return false ;
    }
    
    if (inputFs == null) {
        return false ;
    }
    byte inputType = inputFs.type;
    byte castType = castFs.type;
    
    if (DataType.isSchemaType(castFs.type)) {
        if(inputType == DataType.BYTEARRAY) {
            // good
        } else if (inputType == castType) {
            // Don't do the comparison if both embedded schemas are
            // null.  That will cause Schema.equals to return false,
            // even though we want to view that as true.
            if (!(castFs.schema == null && inputFs.schema == null)) { 
                // compare recursively using schema
                if (!Schema.castable(castFs.schema, inputFs.schema)) {
                    return false ;
                }
            }
        } else {
            return false;
        }
    } else {
        if (inputType == castType) {
            // good
        }
        else if (inputType == DataType.BOOLEAN && (castType == DataType.CHARARRAY
                || castType == DataType.BYTEARRAY || DataType.isNumberType(castType))) {
            // good
        }
        else if (DataType.isNumberType(inputType) && (castType == DataType.CHARARRAY
                || castType == DataType.BYTEARRAY || DataType.isNumberType(castType)
                || castType == DataType.BOOLEAN || castType == DataType.DATETIME)) {
            // good
        }
        else if (inputType == DataType.DATETIME && (castType == DataType.CHARARRAY
                || castType == DataType.BYTEARRAY || DataType.isNumberType(castType))) {
            // good
        }
        else if (inputType == DataType.CHARARRAY && (castType == DataType.BYTEARRAY
                || DataType.isNumberType(castType) || castType == DataType.BOOLEAN
                || castType == DataType.DATETIME)) {
            // good
        } 
        else if (inputType == DataType.BYTEARRAY) {
            // good
        }
        else {
            return false;
        }
    }
    
    return true ;
}
 
Example 10
Source File: Schema.java    From spork with Apache License 2.0 4 votes vote down vote up
/***
 * Compare two field schema for equality
 * @param fschema
 * @param fother
 * @param relaxInner If true, we don't check inner tuple schemas
 * @param relaxAlias If true, we don't check aliases
 * @return true if FieldSchemas are equal, false otherwise
 */
public static boolean equals(FieldSchema fschema,
                             FieldSchema fother,
                             boolean relaxInner,
                             boolean relaxAlias) {
    if (fschema == null) {
        return false ;
    }

    if (fother == null) {
        return false ;
    }

    if (fschema.type != fother.type) {
        return false ;
    }


    if (!relaxAlias) {
        if ( (fschema.alias == null) &&
             (fother.alias == null) ) {
            // good
        }
        else if ( (fschema.alias != null) &&
                  (fother.alias == null) ) {
            return false ;
        }
        else if ( (fschema.alias == null) &&
                  (fother.alias != null) ) {
            return false ;
        }
        else if (!fschema.alias.equals(fother.alias)) {
            return false ;
        }
    }

    if ( (!relaxInner) && (DataType.isSchemaType(fschema.type))) {
        // Don't do the comparison if both embedded schemas are
        // null.  That will cause Schema.equals to return false,
        // even though we want to view that as true.
        if (!(fschema.schema == null && fother.schema == null)) {
            // compare recursively using schema
            if (!Schema.equals(fschema.schema, fother.schema, false, relaxAlias)) {
                return false ;
            }
        }
    }

    return true ;
}
 
Example 11
Source File: Coalesce.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Override
public Schema getOutputSchema(Schema input)
{
  if (input.getFields().size() == 0)
  {
    throw new RuntimeException("Expected at least one parameter");
  }
      
  Byte outputType = null;
  int pos = 0;
  for (FieldSchema field : input.getFields())
  {
    if (DataType.isSchemaType(field.type))
    {
      throw new RuntimeException(String.format("Not supported on schema types.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (DataType.isComplex(field.type))
    {
      throw new RuntimeException(String.format("Not supported on complex types.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (!DataType.isUsableType(field.type))
    {
      throw new RuntimeException(String.format("Not a usable type.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (outputType == null)
    {
      outputType = field.type;
    }
    else if (!outputType.equals(field.type))
    {        
      if (strict)
      {
        throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d.  First element has type '%s'.  "
                                                 + "If you'd like to attempt merging types, use the '%s' option, as '%s' is the default.",
                                                 DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType),LAZY_OPTION,STRICT_OPTION));
      }
      else
      {
        byte merged = DataType.mergeType(outputType, field.type);
        if (merged == DataType.ERROR)
        {
          throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d, where output type is '%s', and types could not be merged.",
                                                   DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType)));
        }
        outputType = merged;
      }
    }
    
    pos++;
  }
  
  getInstanceProperties().put("type", outputType);
      
  return new Schema(new Schema.FieldSchema("item",outputType));
}