org.apache.pig.LoadPushDown.RequiredFieldList Java Examples

The following examples show how to use org.apache.pig.LoadPushDown.RequiredFieldList. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TupleReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * @param configuration configuration for the current job
 * @return List of required fields from pushProjection
 */
static RequiredFieldList getRequiredFields(Configuration configuration) {
  String requiredFieldString = configuration.get(PARQUET_PIG_REQUIRED_FIELDS);

  if(requiredFieldString == null) {
    return null;
  }

  try {
    return (RequiredFieldList) ObjectSerializer.deserialize(requiredFieldString);
  } catch (IOException iOException) {
    throw new RuntimeException("Failed to deserialize pushProjection");
  }
}
 
Example #2
Source File: TupleReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public ReadContext init(InitContext initContext) {
  Schema pigSchema = getPigSchema(initContext.getConfiguration());
  RequiredFieldList requiredFields = getRequiredFields(initContext.getConfiguration());
  boolean columnIndexAccess = initContext.getConfiguration().getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);

  if (pigSchema == null) {
    return new ReadContext(initContext.getFileSchema());
  } else {

    // project the file schema according to the requested Pig schema
    MessageType parquetRequestedSchema = new PigSchemaConverter(columnIndexAccess).filter(initContext.getFileSchema(), pigSchema, requiredFields);
    return new ReadContext(parquetRequestedSchema);
  }
}
 
Example #3
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) {
  List<Type> newFields = new ArrayList<Type>();
  List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>();

  try {
    if(requiredFieldsList == null) {
      int index = 0;
      for(FieldSchema fs : pigSchema.getFields()) {
        indexedFields.add(new Pair<FieldSchema, Integer>(fs, index++));
      }
    } else {
      for(RequiredField rf : requiredFieldsList.getFields()) {
        indexedFields.add(new Pair<FieldSchema, Integer>(pigSchema.getField(rf.getAlias()), rf.getIndex()));
      }
    }

    for (Pair<FieldSchema, Integer> p : indexedFields) {
      FieldSchema fieldSchema = pigSchema.getField(p.first.alias);
      if (p.second < schemaToFilter.getFieldCount()) {
        Type type = schemaToFilter.getFields().get(p.second);
        newFields.add(filter(type, fieldSchema));
      }
    }
  } catch (FrontendException e) {
      throw new RuntimeException("Failed to filter requested fields", e);
  }
  return newFields;
}
 
Example #4
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
  List<FieldSchema> fields = requestedPigSchema.getFields();
  List<Type> newFields = new ArrayList<Type>();
  for (int i = 0; i < fields.size(); i++) {
    FieldSchema fieldSchema = fields.get(i);
    String name = name(fieldSchema.alias, "field_"+i);
    if (schemaToFilter.containsField(name)) {
      newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
    }
  }
  return newFields;
}
 
Example #5
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static RequiredFieldList deserializeRequiredFieldList(String requiredFieldString) {
  if(requiredFieldString == null) {
      return null;
  }

  try {
    return (RequiredFieldList) ObjectSerializer.deserialize(requiredFieldString);
  } catch (IOException e) {
    throw new RuntimeException("Failed to deserialize pushProjection", e);
  }
}
 
Example #6
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
static String serializeRequiredFieldList(RequiredFieldList requiredFieldList) {
  try {
    return ObjectSerializer.serialize(requiredFieldList);
  } catch (IOException e) {
    throw new RuntimeException("Failed to searlize required fields.", e);
  }
}
 
Example #7
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * filters a Parquet schema based on a pig schema for projection
 * @param schemaToFilter the schema to be filter
 * @param requestedPigSchema the pig schema to filter it with
 * @param requiredFieldList projected required fields
 * @return the resulting filtered schema
 */
public MessageType filter(MessageType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldList) {
  try {
    if (LOG.isDebugEnabled()) LOG.debug("filtering schema:\n" + schemaToFilter + "\nwith requested pig schema:\n " + requestedPigSchema);
    List<Type> result = columnAccess.filterTupleSchema(schemaToFilter, requestedPigSchema, requiredFieldList);
    if (LOG.isDebugEnabled()) LOG.debug("schema:\n" + schemaToFilter + "\nfiltered to:\n" + result);
    return new MessageType(schemaToFilter.getName(), result);
  } catch (RuntimeException e) {
    throw new RuntimeException("can't filter " + schemaToFilter + " with " + requestedPigSchema, e);
  }
}
 
Example #8
Source File: PigUtils.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
static String asProjection(RequiredFieldList list, Properties props) {
    List<String> fields = new ArrayList<String>();
    FieldAlias alias = alias(new PropertiesSettings(props));
    for (RequiredField field : list.getFields()) {
        addField(field, fields, alias, "");
    }

    return StringUtils.concatenate(fields, ",");
}
 
Example #9
Source File: AvroStorageSchemaConversionUtilities.java    From spork with Apache License 2.0 2 votes vote down vote up
/**
 * Takes an Avro Schema and a Pig RequiredFieldList and returns a new schema
 * with only the requried fields, or no if the function can't extract only
 * those fields. Useful for push down projections.
 * @param oldSchema The avro schema from which to extract the schema
 * @param rfl the Pig required field list
 * @return the new schema, or null
 */
public static Schema newSchemaFromRequiredFieldList(
    final Schema oldSchema, final RequiredFieldList rfl) {
  return newSchemaFromRequiredFieldList(oldSchema, rfl.getFields());
}
 
Example #10
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 votes vote down vote up
List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList);