Java Code Examples for org.apache.pig.impl.logicalLayer.schema.Schema#getFields()

The following examples show how to use org.apache.pig.impl.logicalLayer.schema.Schema#getFields() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PhoenixHBaseLoaderIT.java    From phoenix with Apache License 2.0 6 votes vote down vote up
/**
 * Validates the schema returned when specific columns of a table are given as part of LOAD .
 * @throws Exception
 */
@Test
public void testSchemaForTableWithSpecificColumns() throws Exception {
    
    //create the table
    final String ddl = "CREATE TABLE " + TABLE_FULL_NAME 
            + "  (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) ";
    conn.createStatement().execute(ddl);
    
    
    final String selectColumns = "ID,NAME";
    pigServer.registerQuery(String.format(
            "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",
            TABLE_FULL_NAME, selectColumns, zkQuorum));
    
    Schema schema = pigServer.dumpSchema("A");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(2, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
    assertTrue(fields.get(0).type == DataType.INTEGER);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME"));
    assertTrue(fields.get(1).type == DataType.CHARARRAY);
    
}
 
Example 2
Source File: PigUtils.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) {
    for (FieldSchema field : schema.getFields()) {
        String node;
        if (field.alias != null) {
            // if no field
            node = fa.toES(field.alias);
            node = (currentNode != null ? currentNode + "." + node : node);
        }
        else {
            node = currentNode;
        }
        // && field.type != DataType.TUPLE
        if (field.schema != null) {
            addField(field.schema, fields, fa, node);
        }
        else {
            if (!StringUtils.hasText(node)) {
                LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema);
            }

            if (node != null) {
                fields.add(fa.toES(node));
            }
        }
    }
}
 
Example 3
Source File: ZipBags.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    Schema bagTupleSchema = new Schema();
    Set<String> aliasSet = new HashSet<String>();
    for (FieldSchema schema : input.getFields()) { //Each field should be a bag
        if (schema.schema == null) throw new RuntimeException("Inner bag schemas are null");
        for (FieldSchema innerBagTuple : schema.schema.getFields()) {
            for (FieldSchema tupleField : innerBagTuple.schema.getFields()) {
                if (!aliasSet.add(tupleField.alias)) {
                    throw new RuntimeException("Duplicate field alias specified");
                }
                bagTupleSchema.add(tupleField);
            }
        }
    }
    try {
        return new Schema(new FieldSchema("zipped",bagTupleSchema, DataType.BAG));
    } catch (FrontendException e) {
        throw new RuntimeException(e);
    }
}
 
Example 4
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
private void insertCastsForUDF(UserFuncExpression func, Schema fromSch, Schema toSch, SchemaType toSchType)
throws FrontendException {
    List<FieldSchema> fsLst = fromSch.getFields();
    List<FieldSchema> tsLst = toSch.getFields();
    List<LogicalExpression> args = func.getArguments();
    int i=-1;
    for (FieldSchema fFSch : fsLst) {
        ++i;
        //if we get to the vararg field (if defined) : take it repeatedly
        FieldSchema tFSch = ((toSchType == SchemaType.VARARG) && i >= tsLst.size()) ? 
                tsLst.get(tsLst.size() - 1) : tsLst.get(i);
        if (fFSch.type == tFSch.type) {
            continue;
        }
        insertCast(func, Util.translateFieldSchema(tFSch), args.get(i));
    }
}
 
Example 5
Source File: TOBAG.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema inputSch) {
    byte type = DataType.ERROR;
    Schema innerSchema = null;
    if(inputSch != null){
        for(FieldSchema fs : inputSch.getFields()){
            if(type == DataType.ERROR){
                type = fs.type;
                innerSchema = fs.schema;
            }else{
                if( type != fs.type || !nullEquals(innerSchema, fs.schema)){
                    // invalidate the type
                    type = DataType.ERROR;
                    break;
                }
            }
        }
    }
    try {
        if(type == DataType.ERROR){
            return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        }
        FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type);
        Schema innerSch = new Schema(innerFs);
        Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG));
        return bagSchema;
    } catch (FrontendException e) {
        //This should not happen
        throw new RuntimeException("Bug : exception thrown while " +
                "creating output schema for TOBAG udf", e);
    }

}
 
Example 6
Source File: AliasableEvalFunc.java    From datafu with Apache License 2.0 5 votes vote down vote up
private void constructFieldAliases(Map<String, Integer> aliases, Schema tupleSchema, String prefix)
{    
  int position = 0;
  for (Schema.FieldSchema field : tupleSchema.getFields()) {
    String alias = getPrefixedAliasName(prefix, field.alias);
    if (field.alias != null && !field.alias.equals("null")) { 
      aliases.put(alias, position);
      log.debug("In instance: "+getInstanceName()+", stored alias " + alias + " as position " + position);
    }
    if (field.schema != null) {
      constructFieldAliases(aliases, field.schema, alias);
    }      
    position++;
  }
}
 
Example 7
Source File: SchemaTupleFactory.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * This method inspects a Schema to see whether or
 * not a SchemaTuple implementation can be generated
 * for the types present. Currently, bags and maps
 * are not supported.
 * @param   s as Schema
 * @return  boolean type value, true if it is generatable
 */
public static boolean isGeneratable(Schema s) {
    if (s == null || s.size() == 0) {
        return false;
    }

    for (Schema.FieldSchema fs : s.getFields()) {
        if (fs.type == DataType.TUPLE && !isGeneratable(fs.schema)) {
            return false;
        }
    }

    return true;
}
 
Example 8
Source File: SchemaTupleFrontend.java    From spork with Apache License 2.0 5 votes vote down vote up
private static void stripAliases(Schema s) {
    for (Schema.FieldSchema fs : s.getFields()) {
        fs.alias = null;
        if (fs.schema != null) {
            stripAliases(fs.schema);
        }
    }
}
 
Example 9
Source File: TOBAG2.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema inputSch) {
    byte type = DataType.ERROR;
    Schema innerSchema = null;
    if(inputSch != null){
        for(FieldSchema fs : inputSch.getFields()){
            if(type == DataType.ERROR){
                type = fs.type;
                innerSchema = fs.schema;
            }else{
                if( type != fs.type || !nullEquals(innerSchema, fs.schema)){
                    // invalidate the type
                    type = DataType.ERROR;
                    break;
                }
            }
        }
    }
    try {
        if(type == DataType.ERROR){
            return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        }
        FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type);
        Schema innerSch = new Schema(innerFs);
        Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG));
        return bagSchema;
    } catch (FrontendException e) {
        //This should not happen
        throw new RuntimeException("Bug : exception thrown while " +
                "creating output schema for TOBAG udf", e);
    }

}
 
Example 10
Source File: RubySchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * This allows the users to set an index or a range of values to
 * a specified RubySchema. The first argument must be a Fixnum or Range,
 * and the second argument may optionally be a Fixnum. The given index
 * (or range of indices) will be replaced by a RubySchema instantiated
 * based on the remaining arguments.
 *
 * @param context the contextthe method is being executed in
 * @param args    a varargs which has to be at least length two.
 * @return        the RubySchema that was added
 */
@JRubyMethod(name = {"[]=", "set"}, required = 2, rest = true)
public RubySchema set(ThreadContext context, IRubyObject[] args) {
    IRubyObject arg1 = args[0];
    IRubyObject arg2 = args[1];
    IRubyObject[] arg3 = Arrays.copyOfRange(args, 1, args.length);
    Schema s = internalSchema;
    Ruby runtime = context.getRuntime();
    List<Schema.FieldSchema> lfs = s.getFields();
    int min, max;
    if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) {
        min = (int)((RubyFixnum)arg1).getLongValue();
        max = (int)((RubyFixnum)arg2).getLongValue();
        arg3 = Arrays.copyOfRange(args, 2, args.length);
    } else if (arg1 instanceof RubyFixnum) {
        min = (int)((RubyFixnum)arg1).getLongValue();
        max = min + 1;
    } else if (arg1 instanceof RubyRange) {
        min = (int)((RubyFixnum)((RubyRange)arg1).min(context, Block.NULL_BLOCK)).getLongValue();
        max = (int)((RubyFixnum)((RubyRange)arg1).max(context, Block.NULL_BLOCK)).getLongValue() + 1;
    } else {
        throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )");
    }
    for (int i = min; i < max; i++)
        lfs.remove(min);
    if (arg3 == null || arg3.length == 0)
        throw new RuntimeException("Must have schema argument for []=");
    RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(arg3);
    for (Schema.FieldSchema fs : rs.getInternalSchema().getFields())
        lfs.add(min++, fs);
    RubySchema.fixSchemaNames(internalSchema);
    return rs;
}
 
Example 11
Source File: TestResourceSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
private boolean CheckTwoLevelAccess(Schema s) {
    if (s == null) return false;
    for (Schema.FieldSchema fs : s.getFields()) {
        if (fs.type == DataType.BAG 
                && fs.schema != null
                && fs.schema.isTwoLevelAccessRequired()) {
            return true;
        }
        if (CheckTwoLevelAccess(fs.schema)) return true;
    }            
    return false;        
}
 
Example 12
Source File: ParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void convertToElephantBirdCompatibleSchema(Schema schema) {
  if (schema == null) {
    return;
  }
  for(FieldSchema fieldSchema:schema.getFields()){
    if (fieldSchema.type== DataType.BOOLEAN) {
      fieldSchema.type=DataType.INTEGER;
    }
    convertToElephantBirdCompatibleSchema(fieldSchema.schema);
  }
}
 
Example 13
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private Type[] convertTypes(Schema pigSchema) {
  List<FieldSchema> fields = pigSchema.getFields();
  Type[] types = new Type[fields.size()];
  for (int i = 0; i < types.length; i++) {
    types[i] = convert(fields.get(i), i);
  }
  return types;
}
 
Example 14
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
  List<FieldSchema> fields = requestedPigSchema.getFields();
  List<Type> newFields = new ArrayList<Type>();
  for (int i = 0; i < fields.size(); i++) {
    FieldSchema fieldSchema = fields.get(i);
    String name = name(fieldSchema.alias, "field_"+i);
    if (schemaToFilter.containsField(name)) {
      newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
    }
  }
  return newFields;
}
 
Example 15
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) {
  List<Type> newFields = new ArrayList<Type>();
  List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>();

  try {
    if(requiredFieldsList == null) {
      int index = 0;
      for(FieldSchema fs : pigSchema.getFields()) {
        indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++));
      }
    } else {
      for(RequiredField rf : requiredFieldsList.getFields()) {
        indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex()));
      }
    }

    for (Pair<FieldSchema, Integer> p : indexedFields) {
      FieldSchema fieldSchema = pigSchema.getField(p.first.alias);
      if (p.second < schemaToFilter.getFieldCount()) {
        Type type = schemaToFilter.getFields().get(p.second);
        newFields.add(filter(type, fieldSchema));
      }
    }
  } catch (FrontendException e) {
      throw new RuntimeException("Failed to filter requested fields", e);
  }
  return newFields;
}
 
Example 16
Source File: TestSchemaTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
private void checkNullGetThrowsError(SchemaTuple<?> st) throws ExecException {
    Schema schema = st.getSchema();
    int i = 0;
    for (Schema.FieldSchema fs : schema.getFields()) {
        boolean fieldIsNull = false;
        try {
            switch (fs.type) {
            case DataType.BIGDECIMAL: st.getBigDecimal(i); break;
            case DataType.BIGINTEGER: st.getBigInteger(i); break;
            case DataType.BOOLEAN: st.getBoolean(i); break;
            case DataType.BYTEARRAY: st.getBytes(i); break;
            case DataType.CHARARRAY: st.getString(i); break;
            case DataType.INTEGER: st.getInt(i); break;
            case DataType.LONG: st.getLong(i); break;
            case DataType.FLOAT: st.getFloat(i); break;
            case DataType.DOUBLE: st.getDouble(i); break;
            case DataType.DATETIME: st.getDateTime(i); break;
            case DataType.TUPLE: st.getTuple(i); break;
            case DataType.BAG: st.getDataBag(i); break;
            case DataType.MAP: st.getMap(i); break;
            default: throw new RuntimeException("Unsupported FieldSchema in SchemaTuple: " + fs);
            }
        } catch (FieldIsNullException e) {
            fieldIsNull = true;
        }
        assertTrue(fieldIsNull);
        i++;
    }
}
 
Example 17
Source File: PhoenixHBaseLoaderIT.java    From phoenix with Apache License 2.0 5 votes vote down vote up
/**
 * Validates the schema returned when a SQL SELECT query is given as part of LOAD .
 * @throws Exception
 */
@Test
public void testSchemaForQuery() throws Exception {
    
   //create the table.
    String ddl = String.format("CREATE TABLE " + TABLE_FULL_NAME +
             "  (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE"
            + "  CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL))\n", TABLE_FULL_NAME);
    conn.createStatement().execute(ddl);
    
    
    
    //sql query for LOAD
    final String sqlQuery = "SELECT A_STRING,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE_FULL_NAME;
    pigServer.registerQuery(String.format(
            "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",
            sqlQuery, zkQuorum));
    
    //assert the schema.
    Schema schema = pigServer.dumpSchema("A");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(3, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string"));
    assertTrue(fields.get(0).type == DataType.CHARARRAY);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("a_integer"));
    assertTrue(fields.get(1).type == DataType.INTEGER);
    assertTrue(fields.get(2).alias.equalsIgnoreCase("a_double"));
    assertTrue(fields.get(2).type == DataType.DOUBLE);
}
 
Example 18
Source File: WeightedReservoirSample.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  try {
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG) {
      throw new RuntimeException("Expected a BAG as input");
    }
    
    Schema inputBagSchema = inputFieldSchema.schema;
    
    if (inputBagSchema.getField(0).type != DataType.TUPLE)
    {
        throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(inputBagSchema.getField(0).type)));
    }
    
    Schema tupleSchema = inputBagSchema.getField(0).schema;
    
    if(tupleSchema == null) {
        throw new RuntimeException("The tuple of input bag has no schema");
    }
    
    List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields();
    
    if(fieldSchemaList == null || fieldSchemaList.size() <= Math.max(0, this.weightIdx)) {
        throw new RuntimeException("The field schema of the input tuple is null " +
        		                   "or the tuple size is no more than the weight field index: "
                                   + this.weightIdx);
    }
    
    if(fieldSchemaList.get(this.weightIdx).type != DataType.INTEGER &&
       fieldSchemaList.get(this.weightIdx).type != DataType.LONG &&
       fieldSchemaList.get(this.weightIdx).type != DataType.FLOAT &&
       fieldSchemaList.get(this.weightIdx).type != DataType.DOUBLE)
    {
        String[] expectedTypes = new String[] {DataType.findTypeName(DataType.INTEGER),
                                               DataType.findTypeName(DataType.LONG),
                                               DataType.findTypeName(DataType.FLOAT),
                                               DataType.findTypeName(DataType.DOUBLE)};
        throw new RuntimeException("Expect the type of the weight field of the input tuple to be of (" +
                java.util.Arrays.toString(expectedTypes) + "), but instead found (" + 
                DataType.findTypeName(fieldSchemaList.get(this.weightIdx).type) + "), weight field: " + 
                this.weightIdx);
    } 
    
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             inputFieldSchema.schema, DataType.BAG));    
  } catch (FrontendException e) {
    e.printStackTrace();
    throw new RuntimeException(e);
  }
}
 
Example 19
Source File: Coalesce.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Override
public Schema getOutputSchema(Schema input)
{
  if (input.getFields().size() == 0)
  {
    throw new RuntimeException("Expected at least one parameter");
  }
      
  Byte outputType = null;
  int pos = 0;
  for (FieldSchema field : input.getFields())
  {
    if (DataType.isSchemaType(field.type))
    {
      throw new RuntimeException(String.format("Not supported on schema types.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (DataType.isComplex(field.type))
    {
      throw new RuntimeException(String.format("Not supported on complex types.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (!DataType.isUsableType(field.type))
    {
      throw new RuntimeException(String.format("Not a usable type.  Found %s in position %d.",DataType.findTypeName(field.type),pos));
    }
    
    if (outputType == null)
    {
      outputType = field.type;
    }
    else if (!outputType.equals(field.type))
    {        
      if (strict)
      {
        throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d.  First element has type '%s'.  "
                                                 + "If you'd like to attempt merging types, use the '%s' option, as '%s' is the default.",
                                                 DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType),LAZY_OPTION,STRICT_OPTION));
      }
      else
      {
        byte merged = DataType.mergeType(outputType, field.type);
        if (merged == DataType.ERROR)
        {
          throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d, where output type is '%s', and types could not be merged.",
                                                   DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType)));
        }
        outputType = merged;
      }
    }
    
    pos++;
  }
  
  getInstanceProperties().put("type", outputType);
      
  return new Schema(new Schema.FieldSchema("item",outputType));
}
 
Example 20
Source File: TestProjectRange.java    From spork with Apache License 2.0 4 votes vote down vote up
private void setAliasesToNull(Schema schema) {
   for(FieldSchema fs : schema.getFields()){
       fs.alias = null;
   }
}