org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema Java Examples

The following examples show how to use org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetLoader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private Schema getSchemaFromRequiredFieldList(Schema schema, List<RequiredField> fieldList)
    throws FrontendException {
  Schema s = new Schema();
  for (RequiredField rf : fieldList) {
    FieldSchema f;
    try {
      f = schema.getField(rf.getAlias()).clone();
    } catch (CloneNotSupportedException e) {
      throw new FrontendException("Clone not supported for the fieldschema", e);
    }
    if (rf.getSubFields() == null) {
      s.add(f);
    } else {
      Schema innerSchema = getSchemaFromRequiredFieldList(f.schema, rf.getSubFields());
      if (innerSchema == null) {
        return null;
      } else {
        f.schema = innerSchema;
        s.add(f);
      }
    }
  }
  return s;
}
 
Example #2
Source File: PhoenixHBaseLoaderIT.java    From phoenix with Apache License 2.0 6 votes vote down vote up
/**
 * Validates the schema returned when specific columns of a table are given as part of LOAD .
 * @throws Exception
 */
@Test
public void testSchemaForTableWithSpecificColumns() throws Exception {
    
    //create the table
    final String ddl = "CREATE TABLE " + TABLE_FULL_NAME 
            + "  (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) ";
    conn.createStatement().execute(ddl);
    
    
    final String selectColumns = "ID,NAME";
    pigServer.registerQuery(String.format(
            "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",
            TABLE_FULL_NAME, selectColumns, zkQuorum));
    
    Schema schema = pigServer.dumpSchema("A");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(2, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
    assertTrue(fields.get(0).type == DataType.INTEGER);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME"));
    assertTrue(fields.get(1).type == DataType.CHARARRAY);
    
}
 
Example #3
Source File: PigStreamingUDF.java    From spork with Apache License 2.0 6 votes vote down vote up
private Tuple deserializeTuple(FieldSchema fs, byte[] buf, int startIndex, int endIndex) throws IOException {
    Schema tupleSchema = fs.schema;
    
    ArrayList<Object> protoTuple = new ArrayList<Object>(tupleSchema.size());
    int depth = 0;
    int fieldNum = 0;
    int fieldStart = startIndex;
    

    for (int index = startIndex; index <= endIndex; index++) {
        depth = DELIMS.updateDepth(buf, depth, index);
        if (StreamingDelimiters.isDelimiter(DELIMS.getFieldDelim(), buf, index, depth, endIndex)) {
            protoTuple.add(deserialize(tupleSchema.getField(fieldNum), buf, fieldStart, index - 1));
            fieldStart = index + 3;
            fieldNum++;
        }
    }
    return tupleFactory.newTupleNoCopy(protoTuple);
}
 
Example #4
Source File: PhoenixHBaseLoaderIT.java    From phoenix with Apache License 2.0 6 votes vote down vote up
/**
 * Validates the schema returned for a table with Pig data types.
 * @throws Exception
 */
@Test
public void testSchemaForTable() throws Exception {
    final String ddl = String.format("CREATE TABLE %s "
            + "  (a_string varchar not null, a_binary varbinary not null, a_integer integer, cf1.a_float float"
            + "  CONSTRAINT pk PRIMARY KEY (a_string, a_binary))\n", TABLE_FULL_NAME);
    conn.createStatement().execute(ddl);

    pigServer.registerQuery(String.format(
            "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE_FULL_NAME,
            zkQuorum));
    
    final Schema schema = pigServer.dumpSchema("A");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(4, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string"));
    assertTrue(fields.get(0).type == DataType.CHARARRAY);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("a_binary"));
    assertTrue(fields.get(1).type == DataType.BYTEARRAY);
    assertTrue(fields.get(2).alias.equalsIgnoreCase("a_integer"));
    assertTrue(fields.get(2).type == DataType.INTEGER);
    assertTrue(fields.get(3).alias.equalsIgnoreCase("a_float"));
    assertTrue(fields.get(3).type == DataType.FLOAT);
}
 
Example #5
Source File: SchemaUtils.java    From Cubert with Apache License 2.0 6 votes vote down vote up
public static ColumnType coltypeFromFieldSchema(String colName, FieldSchema colSchema)
{
    ColumnType t = new ColumnType();
    t.setName(colName);
    t.setType(convertoRCFTypeName(DataType.findTypeName(colSchema.type)));
    if (colSchema.schema != null)
    {
        try
        {
            t.setColumnSchema(convertToBlockSchema(colSchema.schema));
        }
        catch (FrontendException e)
        {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    return t;
}
 
Example #6
Source File: SchemaUtils.java    From Cubert with Apache License 2.0 6 votes vote down vote up
public static Schema convertFromBlockSchema(BlockSchema blockSchema) throws FrontendException
{
    List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
    for (int i = 0; i < blockSchema.getNumColumns(); i++)
    {
        ColumnType ctype = blockSchema.getColumnType(i);
        byte pigtype = convertToPigType(ctype.getType().toString());
        if (ctype.getColumnSchema() != null)
        {
            Schema nestedSchema = convertFromBlockSchema(ctype.getColumnSchema());
            fieldSchemas.add(new FieldSchema(ctype.getName(), nestedSchema, pigtype));
        }
        else
            fieldSchemas.add(new FieldSchema(ctype.getName(), pigtype));
    }
    return new Schema(fieldSchemas);
}
 
Example #7
Source File: ScorePMML_AuditTest.java    From Surus with Apache License 2.0 6 votes vote down vote up
private Schema buildAuditInputSchema() throws FrontendException {

    	// Build Field Schema
    	List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
        fieldSchemas.add(new Schema.FieldSchema("id"             , DataType.LONG));
        fieldSchemas.add(new Schema.FieldSchema("age"            , DataType.INTEGER));
        fieldSchemas.add(new Schema.FieldSchema("employment"     , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("education"      , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("marital"        , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("occupation"     , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("income"         , DataType.DOUBLE));
        fieldSchemas.add(new Schema.FieldSchema("gender"         , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("deductions"     , DataType.DOUBLE));
        fieldSchemas.add(new Schema.FieldSchema("hours"          , DataType.INTEGER));
        fieldSchemas.add(new Schema.FieldSchema("ignore_accounts", DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("risk_adjustment", DataType.INTEGER));
        fieldSchemas.add(new Schema.FieldSchema("target_adjusted", DataType.INTEGER));

        return new Schema(fieldSchemas);

    }
 
Example #8
Source File: PigUtils.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) {
    for (FieldSchema field : schema.getFields()) {
        String node;
        if (field.alias != null) {
            // if no field
            node = fa.toES(field.alias);
            node = (currentNode != null ? currentNode + "." + node : node);
        }
        else {
            node = currentNode;
        }
        // && field.type != DataType.TUPLE
        if (field.schema != null) {
            addField(field.schema, fields, fa, node);
        }
        else {
            if (!StringUtils.hasText(node)) {
                LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema);
            }

            if (node != null) {
                fields.add(fa.toES(node));
            }
        }
    }
}
 
Example #9
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException {
     Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]");
     List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema);
     List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema);

     List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>(
         cols.size());

     for (int i = 0; i < cols.size(); i++) {
         fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
             .findPigDataType(types.get(i))));
     }

     Properties props = new Properties();

     props.setProperty(Constants.LIST_COLUMNS,
         HiveRCSchemaUtil.listToString(cols));
     props.setProperty(Constants.LIST_COLUMN_TYPES,
         HiveRCSchemaUtil.listToString(types));

     Configuration hiveConf = new HiveConf(conf, SessionState.class);
     ColumnarSerDe serde = new ColumnarSerDe();
     serde.initialize(hiveConf, props);

     return (ColumnarStruct) serde.deserialize(buff);
}
 
Example #10
Source File: CountDistinctUpTo.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {

	if (input.size() != 1) {
		throw new RuntimeException("Expected a single field of type bag, but found " + input.size() + " fields");
	}

	FieldSchema field;
	try {
		field = input.getField(0);

		if (field.type != DataType.BAG) {
			throw new RuntimeException("Expected a bag but got: " + DataType.findTypeName(field.type));
		}
	} catch (FrontendException e) {
		throw new RuntimeException(e);
	}

	return new Schema(new FieldSchema("CountDistinctUpTo", DataType.INTEGER));
}
 
Example #11
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private Schema convertFields(List<Type> parquetFields) {
  List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
  for (Type parquetType : parquetFields) {
    try{
      FieldSchema innerfieldSchema = getFieldSchema(parquetType);
      if (parquetType.isRepetition(Repetition.REPEATED)) {
        Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema));
        fields.add(new FieldSchema(null, bagSchema, DataType.BAG));
      } else {
        fields.add(innerfieldSchema);
      }
    }
    catch (FrontendException fe) {
      throw new SchemaConversionException("can't convert "+ parquetType, fe);
    }
  }
  return new Schema(fields);
}
 
Example #12
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
private void insertCastsForUDF(UserFuncExpression func, Schema fromSch, Schema toSch, SchemaType toSchType)
throws FrontendException {
    List<FieldSchema> fsLst = fromSch.getFields();
    List<FieldSchema> tsLst = toSch.getFields();
    List<LogicalExpression> args = func.getArguments();
    int i=-1;
    for (FieldSchema fFSch : fsLst) {
        ++i;
        //if we get to the vararg field (if defined) : take it repeatedly
        FieldSchema tFSch = ((toSchType == SchemaType.VARARG) && i >= tsLst.size()) ? 
                tsLst.get(tsLst.size() - 1) : tsLst.get(i);
        if (fFSch.type == tFSch.type) {
            continue;
        }
        insertCast(func, Util.translateFieldSchema(tFSch), args.get(i));
    }
}
 
Example #13
Source File: Quantile.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  Schema tupleSchema = new Schema();
  if (ordinalOutputSchema)
  {
    for (int i = 0; i < this.quantiles.size(); i++) 
    {
      tupleSchema.add(new Schema.FieldSchema("quantile_" + i, DataType.DOUBLE));
    }
  }
  else
  {
    for (Double x : this.quantiles)
      tupleSchema.add(new Schema.FieldSchema("quantile_" + x.toString().replace(".", "_"), DataType.DOUBLE));
  }

  try {
    return new Schema(new FieldSchema(null, tupleSchema, DataType.TUPLE));
  } catch(FrontendException e) {
    throw new RuntimeException(e);
  }
}
 
Example #14
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param alias
 * @param fieldSchema
 * @return an optional group containing one repeated group field (key, value)
 * @throws FrontendException
 */
private GroupType convertMap(String alias, FieldSchema fieldSchema) {
  Schema innerSchema = fieldSchema.schema;
  if (innerSchema == null || innerSchema.size() != 1) {
    throw new SchemaConversionException("Invalid map Schema, schema should contain exactly one field: " + fieldSchema);
  }
  FieldSchema innerField = null;
  try {
    innerField = innerSchema.getField(0);
  } catch (FrontendException fe) {
    throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe);
  }
  Type convertedValue = convertWithName(innerField, "value");
  return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"),
      convertedValue);
}
 
Example #15
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
private static void convertEmptyTupleToBytearrayTuple(
        FieldSchema fs) {
    if(fs.type == DataType.TUPLE
            && fs.schema != null
            && fs.schema.size() == 0){
        fs.schema.add(new FieldSchema(null, DataType.BYTEARRAY));
        return;
    }

    if(fs.schema != null){
        for(FieldSchema inFs : fs.schema.getFields()){
            convertEmptyTupleToBytearrayTuple(inFs);
        }
    }

}
 
Example #16
Source File: MarkovPairs.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  try {
    Schema tupleSchema = new Schema();
               
    FieldSchema fieldSchema = input.getField(0);
    
    if (fieldSchema.type != DataType.BAG)
    {
      throw new RuntimeException(String.format("Expected input schema to be BAG, but instead found %s",
                                               DataType.findTypeName(fieldSchema.type)));
    }
    
    FieldSchema fieldSchema2 = fieldSchema.schema.getField(0);
    
    tupleSchema.add(new Schema.FieldSchema("elem1", fieldSchema2.schema));
    tupleSchema.add(new Schema.FieldSchema("elem2", fieldSchema2.schema));
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             tupleSchema,
                                             DataType.BAG));
  }
  catch (Exception e) {
    return null;
  }
}
 
Example #17
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Gets the positions in the schema which are byte arrays
 * @param func
 *
 * @param s -
 *            input schema
 * @throws VisitorException
 */
private List<Integer> getByteArrayPositions(UserFuncExpression func, Schema s)
        throws VisitorException {
    List<Integer> result = new ArrayList<Integer>();
    for (int i = 0; i < s.size(); i++) {
        try {
            FieldSchema fs = s.getField(i);
            if (fs.type == DataType.BYTEARRAY) {
                result.add(i);
            }
        } catch (FrontendException fee) {
            int errCode = 1043;
            String msg = "Unable to retrieve field schema.";
            throw new TypeCheckerException(func, msg, errCode, PigException.INPUT, fee);            }
    }
    return result;
}
 
Example #18
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Checks to see if any field of the input schema is a byte array
 * @param func
 * @param s - input schema
 * @return true if found else false
 * @throws VisitorException
 */
private boolean byteArrayFound(UserFuncExpression func, Schema s) throws VisitorException {
    for(int i=0;i<s.size();i++){
        try {
            FieldSchema fs=s.getField(i);
            if(fs == null)
                return false;
            if(fs.type==DataType.BYTEARRAY){
                return true;
            }
        } catch (FrontendException fee) {
            int errCode = 1043;
            String msg = "Unable to retrieve field schema.";
            throw new TypeCheckerException(func, msg, errCode, PigException.INPUT, fee);
        }
    }
    return false;
}
 
Example #19
Source File: ParquetLoader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getPredicateFields(String s, Job job) throws IOException {
  if(!job.getConfiguration().getBoolean(ENABLE_PREDICATE_FILTER_PUSHDOWN, DEFAULT_PREDICATE_PUSHDOWN_ENABLED)) {
    return null;
  }

  List<String> fields = new ArrayList<String>();

  for(FieldSchema field : schema.getFields()) {
    switch(field.type) {
      case DataType.BOOLEAN:
      case DataType.INTEGER:
      case DataType.LONG:
      case DataType.FLOAT:
      case DataType.DOUBLE:
      case DataType.CHARARRAY:
        fields.add(field.alias);
        break;
      default:
        // Skip BYTEARRAY, TUPLE, MAP, BAG, DATETIME, BIGINTEGER, BIGDECIMAL
        break;
    }
  }

  return fields;
}
 
Example #20
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    Schema.FieldSchema charFs = new FieldSchema(null, DataType.CHARARRAY);
    Schema.FieldSchema intFs = new FieldSchema(null, DataType.INTEGER);
    Schema.FieldSchema floatFs = new FieldSchema(null, DataType.FLOAT);
    Schema bagSchema = new Schema();
    bagSchema.add(charFs);
    bagSchema.add(intFs);
    bagSchema.add(floatFs);
    Schema.FieldSchema bagFs;
    try {
        bagFs = new Schema.FieldSchema(null, bagSchema, DataType.BAG);
    } catch (FrontendException fee) {
        return null;
    }
    return new Schema(bagFs);
}
 
Example #21
Source File: TestPigStreamingUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDeserialize__emptyString() throws IOException {
    byte[] input = "|_".getBytes();
    FieldSchema schema = new FieldSchema("", DataType.CHARARRAY);
    PigStreamingUDF sp = new PigStreamingUDF(schema);

    Object out = sp.deserialize(input, 0, input.length);
    Assert.assertEquals(tf.newTuple(""), out);
}
 
Example #22
Source File: TupleDiff.java    From datafu with Apache License 2.0 5 votes vote down vote up
private FieldSchema getFieldSchema(FieldSchema fieldSchema, int fieldNum) throws ExecException, FrontendException {
	if (fieldSchema == null) {
		return null;
	}

	Schema schema = fieldSchema.schema;

	return schema.size() < (fieldNum + 1) ? null : schema.getField(fieldNum);
}
 
Example #23
Source File: ParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void convertToElephantBirdCompatibleSchema(Schema schema) {
  if (schema == null) {
    return;
  }
  for(FieldSchema fieldSchema:schema.getFields()){
    if (fieldSchema.type== DataType.BOOLEAN) {
      fieldSchema.type=DataType.INTEGER;
    }
    convertToElephantBirdCompatibleSchema(fieldSchema.schema);
  }
}
 
Example #24
Source File: TestConstructorArgs.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(),
        new Schema(new FieldSchema(null, DataType.CHARARRAY))));
    funcList.add(new FuncSpec(IntTest.class.getName(),
        new Schema(new FieldSchema(null, DataType.INTEGER))));
    return funcList;
}
 
Example #25
Source File: TestPigStreamingUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDeserialize__bug() throws Exception {
    byte[] input = "|(_|-_|,_32|,_987654321098765432|,_987654321098765432|)_|_".getBytes();

    FieldSchema f1 = new FieldSchema("", DataType.CHARARRAY);
    FieldSchema f2 = new FieldSchema("", DataType.INTEGER);
    FieldSchema f3 = new FieldSchema("", DataType.LONG);
    FieldSchema f4 = new FieldSchema("", DataType.LONG);

    List<FieldSchema> fsl = new ArrayList<FieldSchema>();
    fsl.add(f1);
    fsl.add(f2);
    fsl.add(f3);
    fsl.add(f4);
    Schema schema = new Schema(fsl);
    FieldSchema fs = new FieldSchema("", schema, DataType.TUPLE);
    PigStreamingUDF sp = new PigStreamingUDF(fs);

    
    Tuple expectedOutput1 = tf.newTuple(4);
    expectedOutput1.set(0, null);
    expectedOutput1.set(1, 32);
    expectedOutput1.set(2, 987654321098765432L);
    expectedOutput1.set(3, 987654321098765432L);
    
    Object out = sp.deserialize(input, 0, input.length);
    Assert.assertEquals(tf.newTuple(expectedOutput1), out);
}
 
Example #26
Source File: TestPigStreamingUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDeserialize__boolean() throws IOException {
    byte[] input = "true|_".getBytes();
    FieldSchema schema = new FieldSchema("", DataType.BOOLEAN);
    PigStreamingUDF sp = new PigStreamingUDF(schema);

    Object out = sp.deserialize(input, 0, input.length);
    Assert.assertEquals(tf.newTuple(Boolean.TRUE), out);
}
 
Example #27
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Check if the fieldSch is a bag with empty tuple schema
 * @param fieldSch
 * @return
 * @throws FrontendException
 */
private static boolean isNotBagWithEmptyTuple(FieldSchema fieldSch)
throws FrontendException {
    boolean isBagWithEmptyTuple = false;
    if(fieldSch.type == DataType.BAG
            && fieldSch.schema != null
            && fieldSch.schema.getField(0) != null
            && fieldSch.schema.getField(0).type == DataType.TUPLE
            && fieldSch.schema.getField(0).schema == null
    ){
        isBagWithEmptyTuple = true;
    }
    return !isBagWithEmptyTuple;
}
 
Example #28
Source File: LSHFunc.java    From datafu with Apache License 2.0 5 votes vote down vote up
/**
 * Validate the input schema to ensure that our input is consistent and that we fail fast.
 * @param input
 * @throws FrontendException
 */
private void validateInputSchema(Schema input) throws FrontendException
{
  FieldSchema vectorSchema = input.getField(0);
  if(!DataTypeUtil.isValidVector(vectorSchema, getDimension()))
  {
    throw new FrontendException("Invalid vector element: Expected either a tuple or a bag, but found " + vectorSchema);
  }
}
 
Example #29
Source File: MetricUDF.java    From datafu with Apache License 2.0 5 votes vote down vote up
/**
 * Validate the input schema to ensure that our input is consistent and that we fail fast.
 * @param input input schema
 * @throws FrontendException
 */
private void validateInputSchema(Schema input) throws FrontendException
{
  {
    FieldSchema vectorSchema = input.getField(0);
    if(!DataTypeUtil.isValidVector(vectorSchema, dim))
    {
      throw new FrontendException("Invalid vector element: Expected either a tuple or a bag, but found " + vectorSchema);
    }
  }

  {
    FieldSchema distanceSchema = input.getField(1);
    if(distanceSchema.type != DataType.DOUBLE
    && distanceSchema.type != DataType.INTEGER
    && distanceSchema.type != DataType.LONG
    )
    {
      throw new FrontendException("Invalid distance element: Expected a number, but found " + distanceSchema);
    }
  }

  {
    FieldSchema pointsSchema = input.getField(2);
    if( pointsSchema.type != DataType.BAG)
    {
      throw new FrontendException("Invalid points element: Expected a bag, but found " + pointsSchema);
    }
    FieldSchema tupleInBag = pointsSchema.schema.getField(0);
    FieldSchema vectorInTuple = tupleInBag.schema.getField(0);
    if(!DataTypeUtil.isValidVector(vectorInTuple, dim))
    {
      throw new FrontendException("Invalid points element: Expected a bag of vectors, but found " + vectorInTuple.schema);
    }
  }
}
 
Example #30
Source File: ParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private FilterPredicate buildFilter(OpType op, Column col, Const value) {
  String name = col.getName();
  try {
    FieldSchema f = schema.getField(name);
    switch (f.type) {
      case DataType.BOOLEAN:
        Operators.BooleanColumn boolCol = booleanColumn(name);
        switch(op) {
          case OP_EQ: return eq(boolCol, getValue(value, boolCol.getColumnType()));
          case OP_NE: return notEq(boolCol, getValue(value, boolCol.getColumnType()));
          default: throw new RuntimeException(
              "Operation " + op + " not supported for boolean column: " + name);
        }
      case DataType.INTEGER:
        Operators.IntColumn intCol = intColumn(name);
        return op(op, intCol, value);
      case DataType.LONG:
        Operators.LongColumn longCol = longColumn(name);
        return op(op, longCol, value);
      case DataType.FLOAT:
        Operators.FloatColumn floatCol = floatColumn(name);
        return op(op, floatCol, value);
      case DataType.DOUBLE:
        Operators.DoubleColumn doubleCol = doubleColumn(name);
        return op(op, doubleCol, value);
      case DataType.CHARARRAY:
        Operators.BinaryColumn binaryCol = binaryColumn(name);
        return op(op, binaryCol, value);
      default:
        throw new RuntimeException("Unsupported type " + f.type + " for field: " + name);
    }
  } catch (FrontendException e) {
    throw new RuntimeException("Error processing pushdown for column:" + col, e);
  }
}