Java Code Examples for org.apache.hadoop.hive.ql.hooks.WriteEntity#getType()

The following examples show how to use org.apache.hadoop.hive.ql.hooks.WriteEntity#getType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveHook.java    From incubator-atlas with Apache License 2.0 6 votes vote down vote up
private boolean isSelectQuery(HiveEventContext event) {
    if (event.getOperation() == HiveOperation.QUERY) {
        //Select query has only one output
        if (event.getOutputs().size() == 1) {
            WriteEntity output = event.getOutputs().iterator().next();
            /* Strangely select queries have DFS_DIR as the type which seems like a bug in hive. Filter out by checking if the path is a temporary URI
             * Insert into/overwrite queries onto local or dfs paths have DFS_DIR or LOCAL_DIR as the type and WriteType.PATH_WRITE and tempUri = false
             * Insert into a temporary table has isTempURI = false. So will not skip as expected
             */
            if (output.getType() == Type.DFS_DIR || output.getType() == Type.LOCAL_DIR) {
                if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE &&
                    output.isTempURI()) {
                    return true;
                }
            }
        }
    }
    return false;
}
 
Example 2
Source File: CreateHiveProcess.java    From atlas with Apache License 2.0 5 votes vote down vote up
private boolean skipProcess() {
    Set<ReadEntity>  inputs  = getInputs();
    Set<WriteEntity> outputs = getOutputs();

    boolean ret = CollectionUtils.isEmpty(inputs) && CollectionUtils.isEmpty(outputs);

    if (!ret) {
        if (getContext().getHiveOperation() == HiveOperation.QUERY) {
            // Select query has only one output
            if (outputs.size() == 1) {
                WriteEntity output = outputs.iterator().next();

                if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR) {
                    if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE && output.isTempURI()) {
                        ret = true;
                    }
                }
                // DELETE and UPDATE initially have one input and one output.
                // Since they do not support sub-query, they won't create a lineage that have one input and one output. (One input only)
                // It's safe to filter them out here.
                if (output.getWriteType() == WriteEntity.WriteType.DELETE || output.getWriteType() == WriteEntity.WriteType.UPDATE) {
                    ret = true;
                }
            }
        }
    }

    return ret;
}
 
Example 3
Source File: HiveITBase.java    From atlas with Apache License 2.0 5 votes vote down vote up
protected static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, AtlasEntity> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
    if (refs != null) {
        Set<String> dataSetsProcessed = new LinkedHashSet<>();
        if (sortedOutputs != null) {
            for (WriteEntity output : sortedOutputs) {
                final Entity entity = output;
                if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
                    if (ignoreHDFSPathsInQFName &&
                            (Entity.Type.DFS_DIR.equals(output.getType()) || Entity.Type.LOCAL_DIR.equals(output.getType()))) {
                        LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
                    } else if (refs.containsKey(output)) {
                        //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
                        if (addQueryType(op, (WriteEntity) entity)) {
                            buffer.append(SEP);
                            buffer.append(((WriteEntity) entity).getWriteType().name());
                        }

                        if ( output.getType() == Entity.Type.PARTITION || output.getType() == Entity.Type.TABLE) {
                            Table outputTable = refreshTable(hiveBridge, output.getTable().getDbName(), output.getTable().getTableName());

                            if (outputTable != null) {
                                addDataset(buffer, refs.get(output), HiveMetaStoreBridge.getTableCreatedTime(outputTable));
                            }
                        } else {
                            addDataset(buffer, refs.get(output));
                        }
                    }

                    dataSetsProcessed.add(output.getName().toLowerCase());
                }
            }
        }
    }
}
 
Example 4
Source File: HiveHook.java    From incubator-atlas with Apache License 2.0 5 votes vote down vote up
private static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
    if (refs != null) {
        Set<String> dataSetsProcessed = new LinkedHashSet<>();
        if (sortedOutputs != null) {
            for (WriteEntity output : sortedOutputs) {
                final Entity entity = output;
                if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
                    //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
                    if (addQueryType(op, (WriteEntity) entity)) {
                        buffer.append(SEP);
                        buffer.append(((WriteEntity) entity).getWriteType().name());
                    }
                    if (ignoreHDFSPathsInQFName &&
                        (Type.DFS_DIR.equals(output.getType()) || Type.LOCAL_DIR.equals(output.getType()))) {
                        LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
                    } else if (refs.containsKey(output)) {
                        if ( output.getType() == Type.PARTITION || output.getType() == Type.TABLE) {
                            final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(hiveBridge.hiveClient.getTable(output.getTable().getDbName(), output.getTable().getTableName()));
                            addDataset(buffer, refs.get(output), createTime.getTime());
                        } else {
                            addDataset(buffer, refs.get(output));
                        }
                    }
                    dataSetsProcessed.add(output.getName().toLowerCase());
                }
            }
        }
    }
}
 
Example 5
Source File: AlterTableRename.java    From atlas with Apache License 2.0 4 votes vote down vote up
public List<HookNotification> getHiveMessages() throws Exception {
    List<HookNotification> ret = new ArrayList<>();
    Table oldTable;
    Table newTable;

    if (CollectionUtils.isEmpty(getInputs())) {
        LOG.error("AlterTableRename: old-table not found in inputs list");

        return ret;
    }

    oldTable = getInputs().iterator().next().getTable();
    newTable = null;

    if (CollectionUtils.isNotEmpty(getOutputs())) {
        for (WriteEntity entity : getOutputs()) {
            if (entity.getType() == Entity.Type.TABLE) {
                newTable = entity.getTable();

                //Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check
                if (StringUtils.equalsIgnoreCase(newTable.getDbName(), oldTable.getDbName()) &&
                        StringUtils.equalsIgnoreCase(newTable.getTableName(), oldTable.getTableName())) {
                    newTable = null;

                    continue;
                }

                newTable = getHive().getTable(newTable.getDbName(), newTable.getTableName());

                break;
            }
        }
    }

    if (newTable == null) {
        LOG.error("AlterTableRename: renamed table not found in outputs list");

        return ret;
    }

    processTables(oldTable, newTable, ret);

    return ret;
}