Java Code Examples for org.apache.kylin.common.util.HadoopUtil#parseHiveTableName()

The following examples show how to use org.apache.kylin.common.util.HadoopUtil#parseHiveTableName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TableService.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public List<Pair<TableDesc, TableExtDesc>> extractHiveTableMeta(String[] tables, String project) throws Exception { // de-dup
    SetMultimap<String, String> db2tables = LinkedHashMultimap.create();
    for (String fullTableName : tables) {
        String[] parts = HadoopUtil.parseHiveTableName(fullTableName);
        db2tables.put(parts[0], parts[1]);
    }

    // load all tables first
    List<Pair<TableDesc, TableExtDesc>> allMeta = Lists.newArrayList();
    ProjectInstance projectInstance = getProjectManager().getProject(project);
    ISourceMetadataExplorer explr = SourceManager.getSource(projectInstance).getSourceMetadataExplorer();
    for (Map.Entry<String, String> entry : db2tables.entries()) {
        Pair<TableDesc, TableExtDesc> pair = explr.loadTableMetadata(entry.getKey(), entry.getValue(), project);
        TableDesc tableDesc = pair.getFirst();
        Preconditions.checkState(tableDesc.getDatabase().equals(entry.getKey().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getName().equals(entry.getValue().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getIdentity()
                .equals(entry.getKey().toUpperCase(Locale.ROOT) + "." + entry.getValue().toUpperCase(Locale.ROOT)));
        TableExtDesc extDesc = pair.getSecond();
        Preconditions.checkState(tableDesc.getIdentity().equals(extDesc.getIdentity()));
        allMeta.add(pair);
    }
    return allMeta;
}
 
Example 2
Source File: StreamingControllerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadTableDesc() throws IOException {
    String requestTableData = "{\"name\":\"my_table_name\",\"source_type\":1,\"columns\":[{\"id\":1,\"name\":"
            + "\"amount\",\"datatype\":\"decimal\"},{\"id\":2,\"name\":\"category\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":3,\"name\":\"order_time\",\"datatype\":\"timestamp\"},{\"id\":4,\"name\":\"device\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":5,\"name\":\"qty\",\"datatype\":\"int\"},{\"id\":6,\"name\":"
            + "\"user_id\",\"datatype\":\"varchar(256)\"},{\"id\":7,\"name\":\"user_age\",\"datatype\":\"int\"},"
            + "{\"id\":8,\"name\":\"user_gender\",\"datatype\":\"varchar(256)\"},{\"id\":9,\"name\":\"currency\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":10,\"name\":\"country\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":11,\"name\":\"year_start\",\"datatype\":\"date\"},{\"id\":12,\"name\":\"quarter_start\","
            + "\"datatype\":\"date\"},{\"id\":13,\"name\":\"month_start\",\"datatype\":\"date\"},{\"id\":14,"
            + "\"name\":\"week_start\",\"datatype\":\"date\"},{\"id\":15,\"name\":\"day_start\",\"datatype\":"
            + "\"date\"},{\"id\":16,\"name\":\"hour_start\",\"datatype\":\"timestamp\"},{\"id\":17,\"name\":"
            + "\"minute_start\",\"datatype\":\"timestamp\"}],\"database\":\"my_database_name\"}";
    TableDesc desc = JsonUtil.readValue(requestTableData, TableDesc.class);
    String[] dbTable = HadoopUtil.parseHiveTableName(desc.getIdentity());
    desc.setName(dbTable[1]);
    desc.setDatabase(dbTable[0]);
    Assert.assertEquals("my_table_name".toUpperCase(Locale.ROOT), desc.getName());
    Assert.assertEquals("my_database_name".toUpperCase(Locale.ROOT), desc.getDatabase());
}
 
Example 3
Source File: TableService.java    From kylin with Apache License 2.0 6 votes vote down vote up
public List<Pair<TableDesc, TableExtDesc>> extractHiveTableMeta(String[] tables, String project) throws Exception { // de-dup
    SetMultimap<String, String> db2tables = LinkedHashMultimap.create();
    for (String fullTableName : tables) {
        String[] parts = HadoopUtil.parseHiveTableName(fullTableName);
        db2tables.put(parts[0], parts[1]);
    }

    // load all tables first
    List<Pair<TableDesc, TableExtDesc>> allMeta = Lists.newArrayList();
    ProjectInstance projectInstance = getProjectManager().getProject(project);
    ISourceMetadataExplorer explr = SourceManager.getSource(projectInstance).getSourceMetadataExplorer();
    for (Map.Entry<String, String> entry : db2tables.entries()) {
        Pair<TableDesc, TableExtDesc> pair = explr.loadTableMetadata(entry.getKey(), entry.getValue(), project);
        TableDesc tableDesc = pair.getFirst();
        Preconditions.checkState(tableDesc.getDatabase().equals(entry.getKey().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getName().equals(entry.getValue().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getIdentity()
                .equals(entry.getKey().toUpperCase(Locale.ROOT) + "." + entry.getValue().toUpperCase(Locale.ROOT)));
        TableExtDesc extDesc = pair.getSecond();
        Preconditions.checkState(tableDesc.getIdentity().equals(extDesc.getIdentity()));
        allMeta.add(pair);
    }
    return allMeta;
}
 
Example 4
Source File: StreamingControllerTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadTableDesc() throws IOException {
    String requestTableData = "{\"name\":\"my_table_name\",\"source_type\":1,\"columns\":[{\"id\":1,\"name\":"
            + "\"amount\",\"datatype\":\"decimal\"},{\"id\":2,\"name\":\"category\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":3,\"name\":\"order_time\",\"datatype\":\"timestamp\"},{\"id\":4,\"name\":\"device\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":5,\"name\":\"qty\",\"datatype\":\"int\"},{\"id\":6,\"name\":"
            + "\"user_id\",\"datatype\":\"varchar(256)\"},{\"id\":7,\"name\":\"user_age\",\"datatype\":\"int\"},"
            + "{\"id\":8,\"name\":\"user_gender\",\"datatype\":\"varchar(256)\"},{\"id\":9,\"name\":\"currency\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":10,\"name\":\"country\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":11,\"name\":\"year_start\",\"datatype\":\"date\"},{\"id\":12,\"name\":\"quarter_start\","
            + "\"datatype\":\"date\"},{\"id\":13,\"name\":\"month_start\",\"datatype\":\"date\"},{\"id\":14,"
            + "\"name\":\"week_start\",\"datatype\":\"date\"},{\"id\":15,\"name\":\"day_start\",\"datatype\":"
            + "\"date\"},{\"id\":16,\"name\":\"hour_start\",\"datatype\":\"timestamp\"},{\"id\":17,\"name\":"
            + "\"minute_start\",\"datatype\":\"timestamp\"}],\"database\":\"my_database_name\"}";
    TableDesc desc = JsonUtil.readValue(requestTableData, TableDesc.class);
    String[] dbTable = HadoopUtil.parseHiveTableName(desc.getIdentity());
    desc.setName(dbTable[1]);
    desc.setDatabase(dbTable[0]);
    Assert.assertEquals("my_table_name".toUpperCase(Locale.ROOT), desc.getName());
    Assert.assertEquals("my_database_name".toUpperCase(Locale.ROOT), desc.getDatabase());
}
 
Example 5
Source File: HiveSourceTableLoader.java    From Kylin with Apache License 2.0 6 votes vote down vote up
public static Set<String> reloadHiveTables(String[] hiveTables, KylinConfig config) throws IOException {

        Map<String, Set<String>> db2tables = Maps.newHashMap();
        for (String table : hiveTables) {
            String[] parts = HadoopUtil.parseHiveTableName(table);
            Set<String> set = db2tables.get(parts[0]);
            if (set == null) {
                set = Sets.newHashSet();
                db2tables.put(parts[0], set);
            }
            set.add(parts[1]);
        }

        // extract from hive
        Set<String> loadedTables = Sets.newHashSet();
        for (String database : db2tables.keySet()) {
            List<String> loaded = extractHiveTables(database, db2tables.get(database), config);
            loadedTables.addAll(loaded);
        }

        return loadedTables;
    }
 
Example 6
Source File: FactDistinctColumnsJob.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private void setupMapper(String intermediateTable) throws IOException {
//        FileInputFormat.setInputPaths(job, input);

        String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
        HCatInputFormat.setInput(job, dbTableNames[0],
                dbTableNames[1]);
        
        job.setInputFormatClass(HCatInputFormat.class);
        job.setMapperClass(FactDistinctColumnsMapper.class);
        job.setCombinerClass(FactDistinctColumnsCombiner.class);
        job.setMapOutputKeyClass(ShortWritable.class);
        job.setMapOutputValueClass(Text.class);
    }
 
Example 7
Source File: InvertedIndexJob.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private void setupMapper(String intermediateTable) throws IOException {

        String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
        HCatInputFormat.setInput(job, dbTableNames[0],
                dbTableNames[1]);
        
        job.setInputFormatClass(HCatInputFormat.class);

        job.setMapperClass(InvertedIndexMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(InvertedIndexPartitioner.class);
    }
 
Example 8
Source File: CubeService.java    From Kylin with Apache License 2.0 5 votes vote down vote up
/**
 * Generate cardinality for table This will trigger a hadoop job
 * The result will be merged into table exd info
 *
 * @param tableName
 */
public void calculateCardinality(String tableName, String submitter) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    tableName = dbTableName[0] + "." + dbTableName[1];
    TableDesc table = getMetadataManager().getTableDesc(tableName);
    final Map<String, String> tableExd = getMetadataManager().getTableDescExd(tableName);
    if (tableExd == null || table == null) {
        IllegalArgumentException e = new IllegalArgumentException("Cannot find table descirptor " + tableName);
        logger.error("Cannot find table descirptor " + tableName, e);
        throw e;
    }

    DefaultChainedExecutable job = new DefaultChainedExecutable();
    job.setName("Hive Column Cardinality calculation for table '" + tableName + "'");
    job.setSubmitter(submitter);

    String outPath = HiveColumnCardinalityJob.OUTPUT_PATH + "/" + tableName;
    String param = "-table " + tableName + " -output " + outPath;

    HadoopShellExecutable step1 = new HadoopShellExecutable();

    step1.setJobClass(HiveColumnCardinalityJob.class);
    step1.setJobParams(param);

    job.addTask(step1);

    HadoopShellExecutable step2 = new HadoopShellExecutable();

    step2.setJobClass(HiveColumnCardinalityUpdateJob.class);
    step2.setJobParams(param);
    job.addTask(step2);

    getExecutableManager().addJob(job);
}
 
Example 9
Source File: HiveMRInput.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * Construct a HiveTableInputFormat to read hive table.
 * @param fullQualifiedTableName "databaseName.tableName"
 */
public HiveTableInputFormat(String fullQualifiedTableName) {
    String[] parts = HadoopUtil.parseHiveTableName(fullQualifiedTableName);
    dbName = parts[0];
    tableName = parts[1];
}
 
Example 10
Source File: TableService.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public String normalizeHiveTableName(String tableName) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    return (dbTableName[0] + "." + dbTableName[1]).toUpperCase(Locale.ROOT);
}
 
Example 11
Source File: HiveMRInput.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * Construct a HiveTableInputFormat to read hive table.
 * @param fullQualifiedTableName "databaseName.tableName"
 */
public HiveTableInputFormat(String fullQualifiedTableName) {
    String[] parts = HadoopUtil.parseHiveTableName(fullQualifiedTableName);
    dbName = parts[0];
    tableName = parts[1];
}
 
Example 12
Source File: TableService.java    From kylin with Apache License 2.0 4 votes vote down vote up
public String normalizeHiveTableName(String tableName) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    return (dbTableName[0] + "." + dbTableName[1]).toUpperCase(Locale.ROOT);
}
 
Example 13
Source File: HiveColumnCardinalityJob.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {

    Options options = new Options();

    try {
        options.addOption(OPTION_TABLE);
        options.addOption(OPTION_OUTPUT_PATH);

        parseOptions(options, args);

        // start job
        String jobName = JOB_TITLE + getOptionsAsString();
        System.out.println("Starting: " + jobName);
        Configuration conf = getConf();
        job = Job.getInstance(conf, jobName);

        setJobClasspath(job);
        
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);
        job.getConfiguration().set("dfs.block.size", "67108864");

        // Mapper
        String table = getOptionValue(OPTION_TABLE);
        String[] dbTableNames = HadoopUtil.parseHiveTableName(table);
        HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);

        job.setInputFormatClass(HCatInputFormat.class);
        job.setMapperClass(ColumnCardinalityMapper.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(BytesWritable.class);

        // Reducer - only one
        job.setReducerClass(ColumnCardinalityReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LongWritable.class);
        job.setNumReduceTasks(1);

        this.deletePath(job.getConfiguration(), output);

        System.out.println("Going to submit HiveColumnCardinalityJob for table '" + table + "'");
        int result = waitForCompletion(job);

        return result;
    } catch (Exception e) {
        printUsage(options);
        throw e;
    }

}
 
Example 14
Source File: IIDistinctColumnsJob.java    From Kylin with Apache License 2.0 4 votes vote down vote up
private void setupMapper() throws IOException {

        String tableName = job.getConfiguration().get(BatchConstants.TABLE_NAME);
        String[] dbTableNames = HadoopUtil.parseHiveTableName(tableName);

        log.info("setting hcat input format, db name {} , table name {}", dbTableNames[0],dbTableNames[1]);

        HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);

        job.setInputFormatClass(HCatInputFormat.class);

        job.setMapperClass(IIDistinctColumnsMapper.class);
        job.setCombinerClass(IIDistinctColumnsCombiner.class);
        job.setMapOutputKeyClass(ShortWritable.class);
        job.setMapOutputValueClass(Text.class);
    }