org.apache.hadoop.hive.ql.exec.Utilities Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.exec.Utilities. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveVectorizedReaderSetting.java    From multiple-dimension-spread with Apache License 2.0 6 votes vote down vote up
public HiveVectorizedReaderSetting( final FileSplit split , final JobConf job , final HiveReaderSetting hiveReaderConfig ) throws IOException{
  this.hiveReaderConfig = hiveReaderConfig;

  rbCtx = Utilities.getVectorizedRowBatchCtx( job );
  partitionValues = new Object[rbCtx.getPartitionColumnCount()];
  if( 0 < partitionValues.length ){
    rbCtx.getPartitionValues( rbCtx, job, split, partitionValues );
  }

  TypeInfo[] typeInfos = rbCtx.getRowColumnTypeInfos();
  columnNames = rbCtx.getRowColumnNames();
  needColumnIds = createNeedColumnId( ColumnProjectionUtils.getReadColumnIDs( job ) );

  projectionColumn = new boolean[columnNames.length];
  assignors = new IColumnVectorAssignor[columnNames.length];
  for( int id : needColumnIds ){
    projectionColumn[id] = true;
    assignors[id] = ColumnVectorAssignorFactory.create( typeInfos[id] );
  }
}
 
Example #2
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets config to read transactional (ACID) tables. Prerequisite is <i>job</i>
 * contains the table properties.
 * @param job
 */
public static void addACIDPropertiesIfNeeded(final JobConf job) {
  if (!AcidUtils.isTablePropertyTransactional(job)) {
    return;
  }

  AcidUtils.setTransactionalTableScan(job, true);

  // Add ACID related properties
  if (Utilities.isSchemaEvolutionEnabled(job, true) &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS) != null &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES) != null) {
    // If the schema evolution columns and types are already set, then there is no additional conf to set.
    return;
  }

  // Get them from table properties and set them as schema evolution properties
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, job.get(serdeConstants.LIST_COLUMNS));
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, job.get(serdeConstants.LIST_COLUMN_TYPES));

}
 
Example #3
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets config to read transactional (ACID) tables. Prerequisite is <i>job</i>
 * contains the table properties.
 * @param job
 */
public static void addACIDPropertiesIfNeeded(final JobConf job) {
  if (!AcidUtils.isTablePropertyTransactional(job)) {
    return;
  }

  AcidUtils.setAcidOperationalProperties(job, true, null);

  // Add ACID related properties
  if (Utilities.isSchemaEvolutionEnabled(job, true) &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS) != null &&
      job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES) != null) {
    // If the schema evolution columns and types are already set, then there is no additional conf to set.
    return;
  }

  // Get them from table properties and set them as schema evolution properties
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, job.get(serdeConstants.LIST_COLUMNS));
  job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, job.get(serdeConstants.LIST_COLUMN_TYPES));

}
 
Example #4
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 6 votes vote down vote up
@Test
public void pushdownTuple() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf);
        assertEquals(sConditions.size(), 1);
        AccumuloPredicateHandler.PushdownTuple tuple = new AccumuloPredicateHandler.PushdownTuple(sConditions.get(0));
        byte [] expectedVal = new byte[4];
        ByteBuffer.wrap(expectedVal).putInt(5);
        assertEquals(tuple.getConstVal(), expectedVal);
        assertEquals(tuple.getcOpt().getClass(), Equal.class);
        assertEquals(tuple.getpCompare().getClass(), IntCompare.class);
    } catch (Exception e) {
        fail(StringUtils.stringifyException(e));
    }
}
 
Example #5
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 6 votes vote down vote up
@Test
public void getRowIDSearchCondition() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "hi");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf);
        assertEquals(sConditions.size(), 1);
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #6
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 6 votes vote down vote up
@Test()
public void rangeEqual() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertTrue(range.contains(new Key(new Text("aaa"))));
        assertTrue(range.afterEndKey(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #7
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 6 votes vote down vote up
@Test
public void rangeGreaterThanOrEqual() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertTrue(range.contains(new Key(new Text("aaa"))));
        assertFalse(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #8
Source File: HoodieCombineHiveInputFormat.java    From hudi with Apache License 2.0 6 votes vote down vote up
/**
 * MOD - Just added this for visibility.
 */
Path[] getInputPaths(JobConf job) throws IOException {
  Path[] dirs = FileInputFormat.getInputPaths(job);
  if (dirs.length == 0) {
    // on tez we're avoiding to duplicate the file info in FileInputFormat.
    if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
      try {
        List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
        dirs = paths.toArray(new Path[paths.size()]);
      } catch (Exception e) {
        throw new IOException("Could not create input files", e);
      }
    } else {
      throw new IOException("No input paths specified in job");
    }
  }
  return dirs;
}
 
Example #9
Source File: HoodieCombineHiveInputFormat.java    From hudi with Apache License 2.0 6 votes vote down vote up
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
    Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException {
  this.inputSplitShim = inputSplitShim;
  this.pathToPartitionInfo = pathToPartitionInfo;
  if (job != null) {
    if (this.pathToPartitionInfo == null) {
      this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
    }

    // extract all the inputFormatClass names for each chunk in the
    // CombinedSplit.
    Path[] ipaths = inputSplitShim.getPaths();
    if (ipaths.length > 0) {
      PartitionDesc part = getPartitionFromPath(this.pathToPartitionInfo, ipaths[0],
          IOPrepareCache.get().getPartitionDescMap());
      inputFormatClassName = part.getInputFileFormatClass().getName();
    }
  }
}
 
Example #10
Source File: HoodieCombineHiveInputFormat.java    From hudi with Apache License 2.0 6 votes vote down vote up
/**
 * Writable interface.
 */
@Override
public void write(DataOutput out) throws IOException {
  if (inputFormatClassName == null) {
    if (pathToPartitionInfo == null) {
      pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
    }

    // extract all the inputFormatClass names for each chunk in the
    // CombinedSplit.
    PartitionDesc part = getPartitionFromPath(pathToPartitionInfo, inputSplitShim.getPath(0),
        IOPrepareCache.get().getPartitionDescMap());

    // create a new InputFormat instance if this is the first time to see
    // this class
    inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  Text.writeString(out, inputFormatClassName);
  if (HoodieParquetRealtimeInputFormat.class.getName().equals(inputFormatClassName)) {
    // Write Shim Class Name
    Text.writeString(out, inputSplitShim.getClass().getName());
  }
  inputSplitShim.write(out);
}
 
Example #11
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test
public void rangeLessThan() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertFalse(range.contains(new Key(new Text("aaa"))));
        assertTrue(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aa"))));
        assertTrue(range.afterEndKey(new Key(new Text("aab"))));
        assertTrue(range.afterEndKey(new Key(new Text("aaa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #12
Source File: Hive010Binding.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path)
    throws IOException {
  init(jobConf);
  final JobConf cloneJobConf = new JobConf(jobConf);
  final PartitionDesc part = pathToPartitionInfo.get(path.toString());

  if ((part != null) && (part.getTableDesc() != null)) {
    Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
  }

  pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString());
  return cloneJobConf;
}
 
Example #13
Source File: KryoSerializer.java    From spork with Apache License 2.0 5 votes vote down vote up
public static byte[] serialize(Object object) {
    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    Output output = new Output(stream);

    Utilities.runtimeSerializationKryo.get().writeObject(output, object);

    output.close(); // close() also calls flush()
    return stream.toByteArray();
}
 
Example #14
Source File: AccumuloPredicateHandler.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param conf JobConf
 * @return list of IndexSearchConditions from the filter expression.
 */
public List<IndexSearchCondition> getSearchConditions(JobConf conf) {
    List<IndexSearchCondition> sConditions = Lists.newArrayList();
    String filteredExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if(filteredExprSerialized == null)
        return sConditions;
    ExprNodeDesc filterExpr = Utilities.deserializeExpression(filteredExprSerialized, conf);
    IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
    ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions);
    if(residual != null)
        throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString());
    return sConditions;
}
 
Example #15
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test()
public void rangeGreaterThan() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertFalse(range.contains(new Key(new Text("aaa"))));
        assertFalse(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aaa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #16
Source File: Hive012Binding.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) {

    final TableScanDesc scanDesc = tableScan.getConf();
    if (scanDesc == null) {
      LOG.debug("Not pushing filters because TableScanDesc is null");
      return;
    }

    // construct column name list for reference by filter push down
    Utilities.setColumnNameList(jobConf, tableScan);

    // push down filters
    final ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
    if (filterExpr == null) {
      LOG.debug("Not pushing filters because FilterExpr is null");
      return;
    }

    final String filterText = filterExpr.getExprString();
    final String filterExprSerialized = Utilities.serializeExpression(filterExpr);
    jobConf.set(
            TableScanDesc.FILTER_TEXT_CONF_STR,
            filterText);
    jobConf.set(
            TableScanDesc.FILTER_EXPR_CONF_STR,
            filterExprSerialized);
  }
 
Example #17
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test
public void rangeLessThanOrEqual() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertTrue(range.contains(new Key(new Text("aaa"))));
        assertTrue(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aa"))));
        assertTrue(range.afterEndKey(new Key(new Text("aab"))));
        assertFalse(range.afterEndKey(new Key(new Text("aaa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #18
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test
public void multiRange() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);

    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);


    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);

    String filterExpr = Utilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 2);
        Iterator<Range> itr = ranges.iterator();
        Range range1 = itr.next();
        Range range2 = itr.next();
        assertNull(range1.clip(range2, true));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #19
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test
public void iteratorIgnoreRowIDFields() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);

    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);


    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);

    String filterExpr = Utilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf);
        assertEquals(iterators.size() , 0);
    } catch (SerDeException e) {
        StringUtils.stringifyException(e);
    }
}
 
Example #20
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test
public void ignoreIteratorPushdown() {
    setup();
    conf.set(serdeConstants.LIST_COLUMNS, "field1,field2,rid");
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
    conf.set(AccumuloSerde.COLUMN_MAPPINGS, "cf|f1,cf|f2,rowID");
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);

    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);


    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);

    String filterExpr = Utilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    conf.setBoolean(AccumuloSerde.NO_ITERATOR_PUSHDOWN, true);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf);
        assertEquals(iterators.size(), 0);
    } catch (Exception e) {
        fail(StringUtils.stringifyException(e));
    }
}
 
Example #21
Source File: HiveAuthzBindingHook.java    From incubator-sentry with Apache License 2.0 5 votes vote down vote up
/**
 * Set the Serde URI privileges. If the URI privileges are not set, which serdeURI will be null,
 * the URI authorization checks will be skipped.
 */
private void setSerdeURI(String serdeClassName) throws SemanticException {
  if (!serdeURIPrivilegesEnabled) {
    return;
  }

  // WhiteList Serde Jar can be used by any users. WhiteList checking is
  // done by comparing the Java package name. The assumption is cluster
  // admin will ensure there is no Java namespace collision.
  // e.g org.apache.hadoop.hive.serde2 is used by hive and cluster admin should
  // ensure no custom Serde class is introduced under the same namespace.
  if (!hasPrefixMatch(serdeWhiteList, serdeClassName)) {
    try {
      CodeSource serdeSrc = Class.forName(serdeClassName, true, Utilities.getSessionSpecifiedClassLoader()).getProtectionDomain().getCodeSource();
      if (serdeSrc == null) {
        throw new SemanticException("Could not resolve the jar for Serde class " + serdeClassName);
      }

      String serdeJar = serdeSrc.getLocation().getPath();
      if (serdeJar == null || serdeJar.isEmpty()) {
        throw new SemanticException("Could not find the jar for Serde class " + serdeClassName + "to validate privileges");
      }

      serdeURI = parseURI(serdeSrc.getLocation().toString(), true);
    } catch (ClassNotFoundException e) {
      throw new SemanticException("Error retrieving Serde class:" + e.getMessage(), e);
    }
  }
}
 
Example #22
Source File: Hive010Binding.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) {

    final TableScanDesc scanDesc = tableScan.getConf();
    if (scanDesc == null) {
      LOG.debug("Not pushing filters because TableScanDesc is null");
      return;
    }

    // construct column name list for reference by filter push down
    Utilities.setColumnNameList(jobConf, tableScan);

    // push down filters
    final ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
    if (filterExpr == null) {
      LOG.debug("Not pushing filters because FilterExpr is null");
      return;
    }

    final String filterText = filterExpr.getExprString();
    final String filterExprSerialized = Utilities.serializeExpression(filterExpr);
    jobConf.set(
            TableScanDesc.FILTER_TEXT_CONF_STR,
            filterText);
    jobConf.set(
            TableScanDesc.FILTER_EXPR_CONF_STR,
            filterExprSerialized);
  }
 
Example #23
Source File: Hive010Binding.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Initialize the mrwork variable in order to get all the partition and start to update the jobconf
 *
 * @param job
 */
private void init(final JobConf job) {
  final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN);
  if (mrwork == null && plan != null && plan.length() > 0) {
    mrwork = Utilities.getMapRedWork(job);
    pathToPartitionInfo.clear();
    for (final Map.Entry<String, PartitionDesc> entry : mrwork.getPathToPartitionInfo().entrySet()) {
      pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath().toString(), entry.getValue());
    }
  }
}
 
Example #24
Source File: Hive012Binding.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path)
    throws IOException {
  init(jobConf);
  final JobConf cloneJobConf = new JobConf(jobConf);
  final PartitionDesc part = pathToPartitionInfo.get(path.toString());

  if ((part != null) && (part.getTableDesc() != null)) {
    Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
  }

  pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString());
  return cloneJobConf;
}
 
Example #25
Source File: Hive012Binding.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * Initialize the mapWork variable in order to get all the partition and start to update the jobconf
 *
 * @param job
 */
private void init(final JobConf job) {
  final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN);
  if (mapWork == null && plan != null && plan.length() > 0) {
    mapWork = Utilities.getMapWork(job);
    pathToPartitionInfo.clear();
    for (final Map.Entry<String, PartitionDesc> entry : mapWork.getPathToPartitionInfo().entrySet()) {
      pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath().toString(), entry.getValue());
    }
  }
}
 
Example #26
Source File: CatalogThriftHiveMetastore.java    From metacat with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public PartitionsByExprResult get_partitions_by_expr(final PartitionsByExprRequest req) throws TException {
    return requestWrapper("get_partitions_by_expr", new Object[]{req},
        () -> {
            try {
                String filter = null;
                if (req.getExpr() != null) {
                    filter = Utilities.deserializeExpressionFromKryo(req.getExpr()).getExprString();
                    if (filter == null) {
                        throw new MetaException("Failed to deserialize expression - ExprNodeDesc not present");
                    }
                }
                //TODO: We need to handle the case for 'hasUnknownPartitions'
                return new PartitionsByExprResult(
                    getPartitionsByFilter(req.getDbName(), req.getTblName(), filter, req.getMaxParts()),
                    false);
            } catch (Exception e) {
                //
                // If there is an exception with filtering, fallback to getting all partition names and then
                // apply the filter.
                //
                final List<String> partitionNames = Lists.newArrayList(
                    get_partition_names(req.getDbName(), req.getTblName(), (short) -1));
                final Table table = get_table(req.getDbName(), req.getTblName());
                final List<String> columnNames = new ArrayList<>();
                final List<PrimitiveTypeInfo> typeInfos = new ArrayList<>();
                for (FieldSchema fs : table.getPartitionKeys()) {
                    columnNames.add(fs.getName());
                    typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
                }
                final boolean hasUnknownPartitions = new PartitionExpressionForMetastore().filterPartitionsByExpr(
                    columnNames, typeInfos, req.getExpr(), req.getDefaultPartitionName(), partitionNames);

                return new PartitionsByExprResult(get_partitions_by_names(
                    req.getDbName(), req.getTblName(), partitionNames), hasUnknownPartitions);
            }
        });
}
 
Example #27
Source File: KryoSerializer.java    From spork with Apache License 2.0 4 votes vote down vote up
public static <T> T deserialize(byte[] buffer, Class<T> clazz) {
    return Utilities.runtimeSerializationKryo.get().readObject(
            new Input(new ByteArrayInputStream(buffer)), clazz);
}
 
Example #28
Source File: AwsGlueSparkHiveShims.java    From aws-glue-data-catalog-client-for-apache-hive-metastore with Apache License 2.0 4 votes vote down vote up
@Override
public ExprNodeGenericFuncDesc getDeserializeExpression(byte[] exprBytes) {
  return Utilities.deserializeExpressionFromKryo(exprBytes);
}
 
Example #29
Source File: CopybookRecordReader.java    From CopybookInputFormat with Apache License 2.0 4 votes vote down vote up
public CopybookRecordReader(FileSplit genericSplit, JobConf job)
    throws IOException {
  try {
    String cblPath = job.get(Const.COPYBOOK_INPUTFORMAT_CBL_HDFS_PATH_CONF);

    if (cblPath == null) {
      if (job != null) {
        MapWork mrwork = Utilities.getMapWork(job);

        if (mrwork == null) {
          System.out.println("When running a client side hive job you have to set \"copybook.inputformat.cbl.hdfs.path\" before executing the query." );
          System.out.println("When running a MR job we can get this from the hive TBLProperties" );
        }
        Map<String, PartitionDesc> map = mrwork.getPathToPartitionInfo();
        
        for (Map.Entry<String, PartitionDesc> pathsAndParts : map.entrySet()) {
          System.out.println("Hey");
          Properties props = pathsAndParts.getValue().getProperties();
          cblPath = props
              .getProperty(Const.COPYBOOK_INPUTFORMAT_CBL_HDFS_PATH_CONF);
          break;
        }
      }
    }

    FileSystem fs = FileSystem.get(job);
    BufferedInputStream inputStream = new BufferedInputStream(
        fs.open(new Path(cblPath)));
    CobolCopybookLoader copybookInt = new CobolCopybookLoader();
    externalRecord = copybookInt
        .loadCopyBook(inputStream, "RR", CopybookLoader.SPLIT_NONE, 0,
            "cp037", Convert.FMT_MAINFRAME, 0, null);

    int fileStructure = Constants.IO_FIXED_LENGTH;

    for (ExternalField field : externalRecord.getRecordFields()) {
      recordByteLength += field.getLen();
    }

    // jump to the point in the split that the first whole record of split
    // starts at
    FileSplit split = (FileSplit) genericSplit;

    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    BufferedInputStream fileIn = new BufferedInputStream(fs.open(split
        .getPath()));

    if (start != 0) {
      pos = start - (start % recordByteLength) + recordByteLength;

      fileIn.skip(pos);
    }

    ret = LineIOProvider.getInstance().getLineReader(
        fileStructure,
        LineIOProvider.getInstance().getLineProvider(fileStructure));

    ret.open(fileIn, externalRecord);
  } catch (Exception e) {
    e.printStackTrace();
  } 

}
 
Example #30
Source File: TestHoodieCombineHiveInputFormat.java    From hudi with Apache License 2.0 4 votes vote down vote up
@Test
@Disabled
public void testHoodieRealtimeCombineHoodieInputFormat() throws Exception {

  Configuration conf = new Configuration();
  // initial commit
  Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
  HoodieTestUtils.init(hadoopConf, tempDir.toAbsolutePath().toString(), HoodieTableType.MERGE_ON_READ);
  String commitTime = "100";
  final int numRecords = 1000;
  // Create 3 parquet files with 1000 records each
  File partitionDir = InputFormatTestUtil.prepareParquetTable(tempDir, schema, 3, numRecords, commitTime);
  InputFormatTestUtil.commit(tempDir, commitTime);

  // insert 1000 update records to log file 0
  String newCommitTime = "101";
  HoodieLogFormat.Writer writer =
      InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid0", commitTime, newCommitTime,
          numRecords, numRecords, 0);
  writer.close();
  // insert 1000 update records to log file 1
  writer =
      InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid1", commitTime, newCommitTime,
          numRecords, numRecords, 0);
  writer.close();
  // insert 1000 update records to log file 2
  writer =
      InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid2", commitTime, newCommitTime,
          numRecords, numRecords, 0);
  writer.close();

  TableDesc tblDesc = Utilities.defaultTd;
  // Set the input format
  tblDesc.setInputFileFormatClass(HoodieCombineHiveInputFormat.class);
  PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
  LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
  pt.put(new Path(tempDir.toAbsolutePath().toString()), partDesc);
  MapredWork mrwork = new MapredWork();
  mrwork.getMapWork().setPathToPartitionInfo(pt);
  Path mapWorkPath = new Path(tempDir.toAbsolutePath().toString());
  Utilities.setMapRedWork(conf, mrwork, mapWorkPath);
  jobConf = new JobConf(conf);
  // Add the paths
  FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
  jobConf.set(HAS_MAP_WORK, "true");
  // The following config tells Hive to choose ExecMapper to read the MAP_WORK
  jobConf.set(MAPRED_MAPPER_CLASS, ExecMapper.class.getName());
  // setting the split size to be 3 to create one split for 3 file groups
  jobConf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MAXSIZE, "3");

  HoodieCombineHiveInputFormat combineHiveInputFormat = new HoodieCombineHiveInputFormat();
  String tripsHiveColumnTypes = "double,string,string,string,double,double,double,double,double";
  InputFormatTestUtil.setPropsForInputFormat(jobConf, schema, tripsHiveColumnTypes);
  InputSplit[] splits = combineHiveInputFormat.getSplits(jobConf, 1);
  // Since the SPLIT_SIZE is 3, we should create only 1 split with all 3 file groups
  assertEquals(1, splits.length);
  RecordReader<NullWritable, ArrayWritable> recordReader =
      combineHiveInputFormat.getRecordReader(splits[0], jobConf, null);
  NullWritable nullWritable = recordReader.createKey();
  ArrayWritable arrayWritable = recordReader.createValue();
  int counter = 0;
  while (recordReader.next(nullWritable, arrayWritable)) {
    // read over all the splits
    counter++;
  }
  // should read out 3 splits, each for file0, file1, file2 containing 1000 records each
  assertEquals(3000, counter);
}