Java Code Examples for org.apache.pig.impl.logicalLayer.schema.Schema#getField()

The following examples show how to use org.apache.pig.impl.logicalLayer.schema.Schema#getField() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReservoirSample.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  try {
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG) {
      throw new RuntimeException("Expected a BAG as input");
    }
    
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             inputFieldSchema.schema, DataType.BAG));    
  } catch (FrontendException e) {
    e.printStackTrace();
    throw new RuntimeException(e);
  }
}
 
Example 2
Source File: SelectStringFieldByName.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public String exec(Tuple input) throws IOException
{
    if(input.size() < 2) {
        throw new IllegalArgumentException("Less then two arguments!");
    }

    String fieldNameToReturn = input.get(0).toString();
    if(fieldNameToReturn == null || fieldNameToReturn == "") {
        return null;
    }

    Schema inputSchema = getInputSchema();
    String matchField = null;
    for(int i=1; i < input.size(); i++)
    {
        Schema.FieldSchema fieldSchema = inputSchema.getField(i);
        if(fieldSchema.alias.equals(fieldNameToReturn)) {
            matchField = (String)input.get(i);
            break;
        }
    }
    return matchField;
}
 
Example 3
Source File: CountDistinctUpTo.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {

	if (input.size() != 1) {
		throw new RuntimeException("Expected a single field of type bag, but found " + input.size() + " fields");
	}

	FieldSchema field;
	try {
		field = input.getField(0);

		if (field.type != DataType.BAG) {
			throw new RuntimeException("Expected a bag but got: " + DataType.findTypeName(field.type));
		}
	} catch (FrontendException e) {
		throw new RuntimeException(e);
	}

	return new Schema(new FieldSchema("CountDistinctUpTo", DataType.INTEGER));
}
 
Example 4
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param alias
 * @param fieldSchema
 * @return an optional group containing one repeated group field (key, value)
 * @throws FrontendException
 */
private GroupType convertMap(String alias, FieldSchema fieldSchema) {
  Schema innerSchema = fieldSchema.schema;
  if (innerSchema == null || innerSchema.size() != 1) {
    throw new SchemaConversionException("Invalid map Schema, schema should contain exactly one field: " + fieldSchema);
  }
  FieldSchema innerField = null;
  try {
    innerField = innerSchema.getField(0);
  } catch (FrontendException fe) {
    throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe);
  }
  Type convertedValue = convertWithName(innerField, "value");
  return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"),
      convertedValue);
}
 
Example 5
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Checks to see if any field of the input schema is a byte array
 * @param func
 * @param s - input schema
 * @return true if found else false
 * @throws VisitorException
 */
private boolean byteArrayFound(UserFuncExpression func, Schema s) throws VisitorException {
    for(int i=0;i<s.size();i++){
        try {
            FieldSchema fs=s.getField(i);
            if(fs == null)
                return false;
            if(fs.type==DataType.BYTEARRAY){
                return true;
            }
        } catch (FrontendException fee) {
            int errCode = 1043;
            String msg = "Unable to retrieve field schema.";
            throw new TypeCheckerException(func, msg, errCode, PigException.INPUT, fee);
        }
    }
    return false;
}
 
Example 6
Source File: TokenizeME.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
    try
    {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.CHARARRAY)
        {
            throw new RuntimeException("Expected a CHARARRAY as input, but got a " + inputFieldSchema.toString());
        }

        Schema tupleSchema = new Schema();
        tupleSchema.add(new Schema.FieldSchema("token",DataType.CHARARRAY));

        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                .getName()
                .toLowerCase(), input),
                tupleSchema,
                DataType.BAG));
    }
    catch (FrontendException e)
    {
        throw new RuntimeException(e);
    }
}
 
Example 7
Source File: SentenceDetect.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
    try
    {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.CHARARRAY)
        {
            throw new RuntimeException("Expected a CHARARRAY as input, but got a " + inputFieldSchema.toString());
        }

        Schema tupleSchema = new Schema();
        tupleSchema.add(new Schema.FieldSchema("sentence",DataType.CHARARRAY));

        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                .getName()
                .toLowerCase(), input),
                tupleSchema,
                DataType.BAG));
    }
    catch (FrontendException e)
    {
        throw new RuntimeException(e);
    }
}
 
Example 8
Source File: INVERSEMAP.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    try{
        if(input.getField(0).type != DataType.MAP) {
            throw new RuntimeException("Expected map, received schema " +DataType.findTypeName(input.getField(0).type));
        }
    } catch(FrontendException e) {
        throw new RuntimeException(e);
    }
    return new Schema(new Schema.FieldSchema(null, DataType.MAP));
}
 
Example 9
Source File: VespaDocumentOperation.java    From vespa with Apache License 2.0 5 votes vote down vote up
private static void writePartialUpdateAndRemoveMap(String name, Object value, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth, String operation) throws IOException {
    schema = (schema != null) ? schema.getField(0).schema : null;
    // extract the key of map and keys in map for writing json when partial updating maps
    Schema valueSchema = (schema != null) ? schema.getField(1).schema : null;
    // data format  { ( key; id, value: (abc,123,(123234,bbaa))) }
    // the first element of each tuple in the bag will be the map to update
    // the second element of each tuple in the bag will be the new value of the map
    DataBag bag = (DataBag) value;
    for (Tuple element : bag) {
        if (element.size() != 2) {
            continue;
        }
        String k = (String) element.get(0);
        Object v = element.get(1);
        Byte t = DataType.findType(v);
        if (t == DataType.TUPLE) {
            g.writeFieldName(name + "{" + k + "}");
            if (operation.equals(PARTIAL_UPDATE_REMOVE)) {
                g.writeStartObject();
                g.writeFieldName(PARTIAL_UPDATE_REMOVE);
                g.writeNumber(0);
                g.writeEndObject();
            } else {
                writePartialUpdate(v, t, g, name, properties, valueSchema, op, depth);
            }
        }
    }
}
 
Example 10
Source File: BagToString.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema inputSchema) {
	try {
		if ((inputSchema == null) || ((inputSchema.size() != 1) && (inputSchema.size() != 2))) {
			throw new RuntimeException("Expecting 2 inputs, found: " + 
					((inputSchema == null) ? 0 : inputSchema.size()));
		}

		FieldSchema inputFieldSchema = inputSchema.getField(0);
		if (inputFieldSchema.type != DataType.BAG) {
			throw new RuntimeException("Expecting a bag of tuples: {()}, found data type: " + 
					DataType.findTypeName(inputFieldSchema.type));
		}

		// first field in the bag schema
		FieldSchema firstFieldSchema = inputFieldSchema.schema.getField(0);
		if ((firstFieldSchema == null) || (firstFieldSchema.schema == null)
				|| firstFieldSchema.schema.size() < 1) {
			throw new RuntimeException("Expecting a bag and a delimeter, found: " + inputSchema);
		}

		if (firstFieldSchema.type != DataType.TUPLE) {
			throw new RuntimeException("Expecting a bag and a delimeter, found: " + inputSchema);
		}
		
		if (inputSchema.size() == 2) {
			FieldSchema secondInputFieldSchema = inputSchema.getField(1);

			if (secondInputFieldSchema.type != DataType.CHARARRAY) {
				throw new RuntimeException("Expecting a bag and a delimeter, found: " + inputSchema);
			}
		}

		return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
	} catch (FrontendException e) {
		e.printStackTrace();
		return null;
	}
}
 
Example 11
Source File: TOKENIZE.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
@Override
public Schema outputSchema(Schema input) {
    
    try {
        Schema.FieldSchema tokenFs = new Schema.FieldSchema("token", 
                DataType.CHARARRAY); 
        Schema tupleSchema = new Schema(tokenFs);

        Schema.FieldSchema tupleFs;
        tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema,
                DataType.TUPLE);

        Schema bagSchema = new Schema(tupleFs);
        bagSchema.setTwoLevelAccessRequired(true);
        Schema.FieldSchema bagFs = new Schema.FieldSchema(
                    "bag_of_tokenTuples_from_" + input.getField(0).alias, bagSchema, DataType.BAG);
                
        return new Schema(bagFs); 
        
        
        
    } catch (FrontendException e) {
        // throwing RTE because
        //above schema creation is not expected to throw an exception
        // and also because superclass does not throw exception
        throw new RuntimeException("Unable to compute TOKENIZE schema.");
    }   
}
 
Example 12
Source File: JsFunction.java    From spork with Apache License 2.0 5 votes vote down vote up
private DataBag jsToPigBag(Scriptable array, Schema schema, int depth) throws FrontendException, ExecException {
    debugConvertJSToPig(depth, "Bag", array, schema);
    if (schema.size() == 1 && schema.getField(0).type == DataType.TUPLE) {
        schema = schema.getField(0).schema;
    }
    List<Tuple> bag = new ArrayList<Tuple>();
    for (Object id : array.getIds()) {
        Scriptable arrayValue = (Scriptable)array.get(((Integer)id).intValue(), null);
        bag.add(jsToPigTuple(arrayValue, schema, depth + 1));
    }
    DataBag result = BagFactory.getInstance().newDefaultBag(bag);
    debugReturn(depth, result);
    return result;
}
 
Example 13
Source File: JsFunction.java    From spork with Apache License 2.0 5 votes vote down vote up
private Tuple jsToPigTuple(Scriptable object, Schema schema, int depth) throws FrontendException, ExecException {
    debugConvertJSToPig(depth, "Tuple", object, schema);
    Tuple t = TupleFactory.getInstance().newTuple(schema.size());
    for (int i = 0; i < schema.size(); i++) {
        FieldSchema field = schema.getField(i);
        if (object.has(field.alias, jsScriptEngine.getScope())) {
            Object attr = object.get(field.alias, object);
            Object value;
            if (field.type == DataType.BAG) {
                value = jsToPigBag((Scriptable)attr, field.schema, depth + 1);
            } else if (field.type == DataType.TUPLE) {
                value = jsToPigTuple((Scriptable)attr, field.schema, depth + 1);
            } else if (field.type == DataType.MAP) {
                value = jsToPigMap((Scriptable)attr, field.schema, depth + 1);
            } else if (attr instanceof NativeJavaObject) {
                value = ((NativeJavaObject)attr).unwrap();
            } else if (attr instanceof Undefined) {
                value = null;
            } else {
                value = attr;
            }
            t.set(i, value);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("X( "+field.alias+" NOT FOUND");
            }
        }
    }
    debugReturn(depth, t);
    return t;
}
 
Example 14
Source File: LSHFunc.java    From datafu with Apache License 2.0 5 votes vote down vote up
/**
 * Validate the input schema to ensure that our input is consistent and that we fail fast.
 * @param input
 * @throws FrontendException
 */
private void validateInputSchema(Schema input) throws FrontendException
{
  FieldSchema vectorSchema = input.getField(0);
  if(!DataTypeUtil.isValidVector(vectorSchema, getDimension()))
  {
    throw new FrontendException("Invalid vector element: Expected either a tuple or a bag, but found " + vectorSchema);
  }
}
 
Example 15
Source File: WeightedSample.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  try {
    if (!(input.size() == 2 || input.size() == 3))
    {
      throw new RuntimeException("Expected input to have two or three fields");
    }
    
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG) {
      throw new RuntimeException("Expected a BAG as first input, got: "+inputFieldSchema.type);
    }
    
    if (input.getField(1).type != DataType.INTEGER) {
      throw new RuntimeException("Expected an INT as second input, got: "+input.getField(1).type);
    }      
    
    if (input.size() == 3 && !(input.getField(2).type == DataType.INTEGER || input.getField(2).type == DataType.LONG)) {
      throw new RuntimeException("Expected an INT or LONG as second input, got: "+input.getField(2).type);
    }
    
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             inputFieldSchema.schema, DataType.BAG));    
  } catch (FrontendException e) {
    e.printStackTrace();
    throw new RuntimeException(e);
  }
}
 
Example 16
Source File: CondEntropy.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
    try {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.BAG)
        {
          throw new RuntimeException("Expected a BAG as input");
        }
        
        Schema inputBagSchema = inputFieldSchema.schema;
        
        if (inputBagSchema.getField(0).type != DataType.TUPLE)
        {
          throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                                   DataType.findTypeName(inputBagSchema.getField(0).type)));
        }
        
        Schema tupleSchema = inputBagSchema.getField(0).schema;
        
        if(tupleSchema == null) {
            throw new RuntimeException("The tuple of the input bag has no schema");
        }
        
        List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields();
        
        if(fieldSchemaList == null || fieldSchemaList.size() != 2) {
            throw new RuntimeException("The field schema of the input tuple is null or its size is not 2");
        }
        
        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                                                               .getName()
                                                               .toLowerCase(), input),
                                             DataType.DOUBLE));
      } catch (FrontendException e) {
        throw new RuntimeException(e);
      }
 }
 
Example 17
Source File: UnorderedPairs.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  try {
    if (input.size() != 1)
    {
      throw new RuntimeException("Expected input to have only a single field");
    }
    
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG)
    {
      throw new RuntimeException("Expected a BAG as input");
    }
    
    Schema inputBagSchema = inputFieldSchema.schema;

    if (inputBagSchema.getField(0).type != DataType.TUPLE)
    {
      throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(inputBagSchema.getField(0).type)));
    }      
    
    Schema ouputTupleSchema = new Schema();
    ouputTupleSchema.add(new Schema.FieldSchema("elem1", inputBagSchema.getField(0).schema.clone(), DataType.TUPLE));
    ouputTupleSchema.add(new Schema.FieldSchema("elem2", inputBagSchema.getField(0).schema.clone(), DataType.TUPLE));
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             ouputTupleSchema, 
                                             DataType.BAG));
  }
  catch (Exception e) {
    return null;
  }
}
 
Example 18
Source File: SummaryData.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
protected FieldSchema getField(Schema schema, int i) {
  try {
    if (schema == null || i >= schema.size()) {
      return null;
    }
    FieldSchema field = schema.getField(i);
    return field;
  } catch (FrontendException e) {
    throw new RuntimeException(e);
  }
}
 
Example 19
Source File: AppendToBag.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  try {
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
            input.getField(0).schema, DataType.BAG));
  }
  catch (FrontendException e) {
    return null;
  }
}
 
Example 20
Source File: Top.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    try {
        if (input.size() < 3) {
            return null;
        }
        Schema.FieldSchema bagFs = new Schema.FieldSchema(null,
                input.getField(2).schema, DataType.BAG);
        return new Schema(bagFs);

    } catch (Exception e) {
        return null;
    }
}