Java Code Examples for org.apache.hadoop.hive.ql.exec.Utilities

The following examples show how to use org.apache.hadoop.hive.ql.exec.Utilities. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
public HiveVectorizedReaderSetting( final FileSplit split , final JobConf job , final HiveReaderSetting hiveReaderConfig ) throws IOException{
  this.hiveReaderConfig = hiveReaderConfig;

  rbCtx = Utilities.getVectorizedRowBatchCtx( job );
  partitionValues = new Object[rbCtx.getPartitionColumnCount()];
  if( 0 < partitionValues.length ){
    rbCtx.getPartitionValues( rbCtx, job, split, partitionValues );
  }

  TypeInfo[] typeInfos = rbCtx.getRowColumnTypeInfos();
  columnNames = rbCtx.getRowColumnNames();
  needColumnIds = createNeedColumnId( ColumnProjectionUtils.getReadColumnIDs( job ) );

  projectionColumn = new boolean[columnNames.length];
  assignors = new IColumnVectorAssignor[columnNames.length];
  for( int id : needColumnIds ){
    projectionColumn[id] = true;
    assignors[id] = ColumnVectorAssignorFactory.create( typeInfos[id] );
  }
}
 
Example 2
Source Project: dremio-oss   Source File: HiveUtilities.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets config to read transactional (ACID) tables. Prerequisite is <i>job</i>
 * contains the table properties.
 * @param job
 */
public static void addACIDPropertiesIfNeeded(final JobConf job) {
  if (!AcidUtils.isTablePropertyTransactional(job)) {
    return;
  }

  AcidUtils.setTransactionalTableScan(job, true);

  // Add ACID related properties
  if (Utilities.isSchemaEvolutionEnabled(job, true) &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS) != null &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES) != null) {
    // If the schema evolution columns and types are already set, then there is no additional conf to set.
    return;
  }

  // Get them from table properties and set them as schema evolution properties
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, job.get(serdeConstants.LIST_COLUMNS));
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, job.get(serdeConstants.LIST_COLUMN_TYPES));

}
 
Example 3
Source Project: dremio-oss   Source File: HiveUtilities.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets config to read transactional (ACID) tables. Prerequisite is <i>job</i>
 * contains the table properties.
 * @param job
 */
public static void addACIDPropertiesIfNeeded(final JobConf job) {
  if (!AcidUtils.isTablePropertyTransactional(job)) {
    return;
  }

  AcidUtils.setAcidOperationalProperties(job, true, null);

  // Add ACID related properties
  if (Utilities.isSchemaEvolutionEnabled(job, true) &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS) != null &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES) != null) {
    // If the schema evolution columns and types are already set, then there is no additional conf to set.
    return;
  }

  // Get them from table properties and set them as schema evolution properties
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, job.get(serdeConstants.LIST_COLUMNS));
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, job.get(serdeConstants.LIST_COLUMN_TYPES));

}
 
Example 4
Source Project: hudi   Source File: HoodieCombineHiveInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * MOD - Just added this for visibility.
 */
Path[] getInputPaths(JobConf job) throws IOException {
  Path[] dirs = FileInputFormat.getInputPaths(job);
  if (dirs.length == 0) {
    // on tez we're avoiding to duplicate the file info in FileInputFormat.
    if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
      try {
        List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
        dirs = paths.toArray(new Path[paths.size()]);
      } catch (Exception e) {
        throw new IOException("Could not create input files", e);
      }
    } else {
      throw new IOException("No input paths specified in job");
    }
  }
  return dirs;
}
 
Example 5
Source Project: hudi   Source File: HoodieCombineHiveInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
    Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException {
  this.inputSplitShim = inputSplitShim;
  this.pathToPartitionInfo = pathToPartitionInfo;
  if (job != null) {
    if (this.pathToPartitionInfo == null) {
      this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
    }

    // extract all the inputFormatClass names for each chunk in the
    // CombinedSplit.
    Path[] ipaths = inputSplitShim.getPaths();
    if (ipaths.length > 0) {
      PartitionDesc part = getPartitionFromPath(this.pathToPartitionInfo, ipaths[0],
          IOPrepareCache.get().getPartitionDescMap());
      inputFormatClassName = part.getInputFileFormatClass().getName();
    }
  }
}
 
Example 6
Source Project: hudi   Source File: HoodieCombineHiveInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Writable interface.
 */
@Override
public void write(DataOutput out) throws IOException {
  if (inputFormatClassName == null) {
    if (pathToPartitionInfo == null) {
      pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
    }

    // extract all the inputFormatClass names for each chunk in the
    // CombinedSplit.
    PartitionDesc part = getPartitionFromPath(pathToPartitionInfo, inputSplitShim.getPath(0),
        IOPrepareCache.get().getPartitionDescMap());

    // create a new InputFormat instance if this is the first time to see
    // this class
    inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  Text.writeString(out, inputFormatClassName);
  if (HoodieParquetRealtimeInputFormat.class.getName().equals(inputFormatClassName)) {
    // Write Shim Class Name
    Text.writeString(out, inputSplitShim.getClass().getName());
  }
  inputSplitShim.write(out);
}
 
Example 7
@Test
public void getRowIDSearchCondition() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "hi");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf);
        assertEquals(sConditions.size(), 1);
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example 8
@Test()
public void rangeEqual() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertTrue(range.contains(new Key(new Text("aaa"))));
        assertTrue(range.afterEndKey(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example 9
@Test
public void rangeGreaterThanOrEqual() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertTrue(range.contains(new Key(new Text("aaa"))));
        assertFalse(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example 10
@Test
public void pushdownTuple() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf);
        assertEquals(sConditions.size(), 1);
        AccumuloPredicateHandler.PushdownTuple tuple = new AccumuloPredicateHandler.PushdownTuple(sConditions.get(0));
        byte [] expectedVal = new byte[4];
        ByteBuffer.wrap(expectedVal).putInt(5);
        assertEquals(tuple.getConstVal(), expectedVal);
        assertEquals(tuple.getcOpt().getClass(), Equal.class);
        assertEquals(tuple.getpCompare().getClass(), IntCompare.class);
    } catch (Exception e) {
        fail(StringUtils.stringifyException(e));
    }
}
 
Example 11
Source Project: metacat   Source File: CatalogThriftHiveMetastore.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public PartitionsByExprResult get_partitions_by_expr(final PartitionsByExprRequest req) throws TException {
    return requestWrapper("get_partitions_by_expr", new Object[]{req},
        () -> {
            try {
                String filter = null;
                if (req.getExpr() != null) {
                    filter = Utilities.deserializeExpressionFromKryo(req.getExpr()).getExprString();
                    if (filter == null) {
                        throw new MetaException("Failed to deserialize expression - ExprNodeDesc not present");
                    }
                }
                //TODO: We need to handle the case for 'hasUnknownPartitions'
                return new PartitionsByExprResult(
                    getPartitionsByFilter(req.getDbName(), req.getTblName(), filter, req.getMaxParts()),
                    false);
            } catch (Exception e) {
                //
                // If there is an exception with filtering, fallback to getting all partition names and then
                // apply the filter.
                //
                final List<String> partitionNames = Lists.newArrayList(
                    get_partition_names(req.getDbName(), req.getTblName(), (short) -1));
                final Table table = get_table(req.getDbName(), req.getTblName());
                final List<String> columnNames = new ArrayList<>();
                final List<PrimitiveTypeInfo> typeInfos = new ArrayList<>();
                for (FieldSchema fs : table.getPartitionKeys()) {
                    columnNames.add(fs.getName());
                    typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
                }
                final boolean hasUnknownPartitions = new PartitionExpressionForMetastore().filterPartitionsByExpr(
                    columnNames, typeInfos, req.getExpr(), req.getDefaultPartitionName(), partitionNames);

                return new PartitionsByExprResult(get_partitions_by_names(
                    req.getDbName(), req.getTblName(), partitionNames), hasUnknownPartitions);
            }
        });
}
 
Example 12
Source Project: parquet-mr   Source File: Hive012Binding.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Initialize the mapWork variable in order to get all the partition and start to update the jobconf
 *
 * @param job
 */
private void init(final JobConf job) {
  final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN);
  if (mapWork == null && plan != null && plan.length() > 0) {
    mapWork = Utilities.getMapWork(job);
    pathToPartitionInfo.clear();
    for (final Map.Entry<String, PartitionDesc> entry : mapWork.getPathToPartitionInfo().entrySet()) {
      pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath().toString(), entry.getValue());
    }
  }
}
 
Example 13
Source Project: parquet-mr   Source File: Hive012Binding.java    License: Apache License 2.0 5 votes vote down vote up
private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) {

    final TableScanDesc scanDesc = tableScan.getConf();
    if (scanDesc == null) {
      LOG.debug("Not pushing filters because TableScanDesc is null");
      return;
    }

    // construct column name list for reference by filter push down
    Utilities.setColumnNameList(jobConf, tableScan);

    // push down filters
    final ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
    if (filterExpr == null) {
      LOG.debug("Not pushing filters because FilterExpr is null");
      return;
    }

    final String filterText = filterExpr.getExprString();
    final String filterExprSerialized = Utilities.serializeExpression(filterExpr);
    jobConf.set(
            TableScanDesc.FILTER_TEXT_CONF_STR,
            filterText);
    jobConf.set(
            TableScanDesc.FILTER_EXPR_CONF_STR,
            filterExprSerialized);
  }
 
Example 14
Source Project: parquet-mr   Source File: Hive012Binding.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path)
    throws IOException {
  init(jobConf);
  final JobConf cloneJobConf = new JobConf(jobConf);
  final PartitionDesc part = pathToPartitionInfo.get(path.toString());

  if ((part != null) && (part.getTableDesc() != null)) {
    Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
  }

  pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString());
  return cloneJobConf;
}
 
Example 15
Source Project: parquet-mr   Source File: Hive010Binding.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Initialize the mrwork variable in order to get all the partition and start to update the jobconf
 *
 * @param job
 */
private void init(final JobConf job) {
  final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN);
  if (mrwork == null && plan != null && plan.length() > 0) {
    mrwork = Utilities.getMapRedWork(job);
    pathToPartitionInfo.clear();
    for (final Map.Entry<String, PartitionDesc> entry : mrwork.getPathToPartitionInfo().entrySet()) {
      pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath().toString(), entry.getValue());
    }
  }
}
 
Example 16
Source Project: parquet-mr   Source File: Hive010Binding.java    License: Apache License 2.0 5 votes vote down vote up
private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) {

    final TableScanDesc scanDesc = tableScan.getConf();
    if (scanDesc == null) {
      LOG.debug("Not pushing filters because TableScanDesc is null");
      return;
    }

    // construct column name list for reference by filter push down
    Utilities.setColumnNameList(jobConf, tableScan);

    // push down filters
    final ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
    if (filterExpr == null) {
      LOG.debug("Not pushing filters because FilterExpr is null");
      return;
    }

    final String filterText = filterExpr.getExprString();
    final String filterExprSerialized = Utilities.serializeExpression(filterExpr);
    jobConf.set(
            TableScanDesc.FILTER_TEXT_CONF_STR,
            filterText);
    jobConf.set(
            TableScanDesc.FILTER_EXPR_CONF_STR,
            filterExprSerialized);
  }
 
Example 17
Source Project: parquet-mr   Source File: Hive010Binding.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path)
    throws IOException {
  init(jobConf);
  final JobConf cloneJobConf = new JobConf(jobConf);
  final PartitionDesc part = pathToPartitionInfo.get(path.toString());

  if ((part != null) && (part.getTableDesc() != null)) {
    Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
  }

  pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString());
  return cloneJobConf;
}
 
Example 18
Source Project: spork   Source File: KryoSerializer.java    License: Apache License 2.0 5 votes vote down vote up
public static byte[] serialize(Object object) {
    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    Output output = new Output(stream);

    Utilities.runtimeSerializationKryo.get().writeObject(output, object);

    output.close(); // close() also calls flush()
    return stream.toByteArray();
}
 
Example 19
/**
 *
 * @param conf JobConf
 * @return list of IndexSearchConditions from the filter expression.
 */
public List<IndexSearchCondition> getSearchConditions(JobConf conf) {
    List<IndexSearchCondition> sConditions = Lists.newArrayList();
    String filteredExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if(filteredExprSerialized == null)
        return sConditions;
    ExprNodeDesc filterExpr = Utilities.deserializeExpression(filteredExprSerialized, conf);
    IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
    ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions);
    if(residual != null)
        throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString());
    return sConditions;
}
 
Example 20
@Test()
public void rangeGreaterThan() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertFalse(range.contains(new Key(new Text("aaa"))));
        assertFalse(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aaa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example 21
@Test
public void rangeLessThan() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertFalse(range.contains(new Key(new Text("aaa"))));
        assertTrue(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aa"))));
        assertTrue(range.afterEndKey(new Key(new Text("aab"))));
        assertTrue(range.afterEndKey(new Key(new Text("aaa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example 22
@Test
public void rangeLessThanOrEqual() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertTrue(range.contains(new Key(new Text("aaa"))));
        assertTrue(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aa"))));
        assertTrue(range.afterEndKey(new Key(new Text("aab"))));
        assertFalse(range.afterEndKey(new Key(new Text("aaa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example 23
@Test
public void multiRange() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);

    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);


    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);

    String filterExpr = Utilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 2);
        Iterator<Range> itr = ranges.iterator();
        Range range1 = itr.next();
        Range range2 = itr.next();
        assertNull(range1.clip(range2, true));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example 24
@Test
public void iteratorIgnoreRowIDFields() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);

    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);


    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);

    String filterExpr = Utilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf);
        assertEquals(iterators.size() , 0);
    } catch (SerDeException e) {
        StringUtils.stringifyException(e);
    }
}
 
Example 25
@Test
public void ignoreIteratorPushdown() {
    setup();
    conf.set(serdeConstants.LIST_COLUMNS, "field1,field2,rid");
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
    conf.set(AccumuloSerde.COLUMN_MAPPINGS, "cf|f1,cf|f2,rowID");
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);

    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);


    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);

    String filterExpr = Utilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    conf.setBoolean(AccumuloSerde.NO_ITERATOR_PUSHDOWN, true);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf);
        assertEquals(iterators.size(), 0);
    } catch (Exception e) {
        fail(StringUtils.stringifyException(e));
    }
}
 
Example 26
Source Project: incubator-sentry   Source File: HiveAuthzBindingHook.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Set the Serde URI privileges. If the URI privileges are not set, which serdeURI will be null,
 * the URI authorization checks will be skipped.
 */
private void setSerdeURI(String serdeClassName) throws SemanticException {
  if (!serdeURIPrivilegesEnabled) {
    return;
  }

  // WhiteList Serde Jar can be used by any users. WhiteList checking is
  // done by comparing the Java package name. The assumption is cluster
  // admin will ensure there is no Java namespace collision.
  // e.g org.apache.hadoop.hive.serde2 is used by hive and cluster admin should
  // ensure no custom Serde class is introduced under the same namespace.
  if (!hasPrefixMatch(serdeWhiteList, serdeClassName)) {
    try {
      CodeSource serdeSrc = Class.forName(serdeClassName, true, Utilities.getSessionSpecifiedClassLoader()).getProtectionDomain().getCodeSource();
      if (serdeSrc == null) {
        throw new SemanticException("Could not resolve the jar for Serde class " + serdeClassName);
      }

      String serdeJar = serdeSrc.getLocation().getPath();
      if (serdeJar == null || serdeJar.isEmpty()) {
        throw new SemanticException("Could not find the jar for Serde class " + serdeClassName + "to validate privileges");
      }

      serdeURI = parseURI(serdeSrc.getLocation().toString(), true);
    } catch (ClassNotFoundException e) {
      throw new SemanticException("Error retrieving Serde class:" + e.getMessage(), e);
    }
  }
}
 
Example 27
@Override
public ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes) {
  return Utilities.deserializeExpressionFromKryo(exprBytes);
}
 
Example 28
@Override
public byte[] getSerializeExpression(ExprNodeGenericFuncDesc expr) {
  return Utilities.serializeExpressionToKryo(expr);
}
 
Example 29
Source Project: presto   Source File: HiveUtil.java    License: Apache License 2.0 4 votes vote down vote up
public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns)
{
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = columns.stream()
            .filter(column -> column.getColumnType() == REGULAR)
            .collect(toImmutableList());

    // Projected columns are not supported here
    readColumns.forEach(readColumn -> checkArgument(readColumn.isBaseColumn(), "column %s is not a base column", readColumn.getName()));

    List<Integer> readHiveColumnIndexes = readColumns.stream()
            .map(HiveColumnHandle::getBaseHiveColumnIndex)
            .collect(toImmutableList());

    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    configuration = copy(configuration);
    setReadColumns(configuration, readHiveColumnIndexes);

    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true);
    JobConf jobConf = toJobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);

    // propagate serialization configuration to getRecordReader
    schema.stringPropertyNames().stream()
            .filter(name -> name.startsWith("serialization."))
            .forEach(name -> jobConf.set(name, schema.getProperty(name)));

    configureCompressionCodecs(jobConf);

    try {
        RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);

        int headerCount = getHeaderCount(schema);
        if (headerCount > 0) {
            Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue());
        }

        int footerCount = getFooterCount(schema);
        if (footerCount > 0) {
            recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
        }

        return recordReader;
    }
    catch (IOException e) {
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }

        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s",
                path,
                start,
                length,
                getInputFormatName(schema),
                firstNonNull(e.getMessage(), e.getClass().getName())),
                e);
    }
}
 
Example 30
public HiveReaderSetting( final FileSplit split, final JobConf job ){
  config = new Configuration();

  disableSkipBlock = job.getBoolean( "mds.disable.block.skip" , false );
  disableFilterPushdown = job.getBoolean( "mds.disable.filter.pushdown" , false );

  Set<String> pathNameSet= createPathSet( split.getPath() );
  List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
  String filterExprSerialized = job.get( TableScanDesc.FILTER_EXPR_CONF_STR );
  if( filterExprSerialized != null ){
    filterExprs.add( Utilities.deserializeExpression(filterExprSerialized) );
  }

  MapWork mapWork;
  try{
    mapWork = Utilities.getMapWork(job);
  }catch( Exception e ){
    mapWork = null;
  }

  if( mapWork == null ){
    node = createExpressionNode( filterExprs );
    isVectorModeFlag = false;
    return;
  }

  node = createExpressionNode( filterExprs );

  for( Map.Entry<String,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet() ){
    if( ! pathNameSet.contains( pathsAndParts.getKey() ) ){
      continue;
    }
    Properties props = pathsAndParts.getValue().getTableDesc().getProperties();
    if( props.containsKey( "mds.expand" ) ){
      config.set( "spread.reader.expand.column" , props.getProperty( "mds.expand" ) );
    }
    if( props.containsKey( "mds.flatten" ) ){
      config.set( "spread.reader.flatten.column" , props.getProperty( "mds.flatten" ) );
    }
  }

  config.set( "spread.reader.read.column.names" , createReadColumnNames( job.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , null ) ) );

  // Next Hive vesion;
  // Utilities.getUseVectorizedInputFileFormat(job)
  isVectorModeFlag = Utilities.isVectorMode( job );
}