Java Code Examples for org.apache.kylin.common.util.HadoopUtil#parseHiveTableName()

The following examples show how to use org.apache.kylin.common.util.HadoopUtil#parseHiveTableName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TableService.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public List<Pair<TableDesc, TableExtDesc>> extractHiveTableMeta(String[] tables, String project) throws Exception { // de-dup
    SetMultimap<String, String> db2tables = LinkedHashMultimap.create();
    for (String fullTableName : tables) {
        String[] parts = HadoopUtil.parseHiveTableName(fullTableName);
        db2tables.put(parts[0], parts[1]);
    }

    // load all tables first
    List<Pair<TableDesc, TableExtDesc>> allMeta = Lists.newArrayList();
    ProjectInstance projectInstance = getProjectManager().getProject(project);
    ISourceMetadataExplorer explr = SourceManager.getSource(projectInstance).getSourceMetadataExplorer();
    for (Map.Entry<String, String> entry : db2tables.entries()) {
        Pair<TableDesc, TableExtDesc> pair = explr.loadTableMetadata(entry.getKey(), entry.getValue(), project);
        TableDesc tableDesc = pair.getFirst();
        Preconditions.checkState(tableDesc.getDatabase().equals(entry.getKey().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getName().equals(entry.getValue().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getIdentity()
                .equals(entry.getKey().toUpperCase(Locale.ROOT) + "." + entry.getValue().toUpperCase(Locale.ROOT)));
        TableExtDesc extDesc = pair.getSecond();
        Preconditions.checkState(tableDesc.getIdentity().equals(extDesc.getIdentity()));
        allMeta.add(pair);
    }
    return allMeta;
}

Example 2

Source File: StreamingControllerTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Test
public void testReadTableDesc() throws IOException {
    String requestTableData = "{\"name\":\"my_table_name\",\"source_type\":1,\"columns\":[{\"id\":1,\"name\":"
            + "\"amount\",\"datatype\":\"decimal\"},{\"id\":2,\"name\":\"category\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":3,\"name\":\"order_time\",\"datatype\":\"timestamp\"},{\"id\":4,\"name\":\"device\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":5,\"name\":\"qty\",\"datatype\":\"int\"},{\"id\":6,\"name\":"
            + "\"user_id\",\"datatype\":\"varchar(256)\"},{\"id\":7,\"name\":\"user_age\",\"datatype\":\"int\"},"
            + "{\"id\":8,\"name\":\"user_gender\",\"datatype\":\"varchar(256)\"},{\"id\":9,\"name\":\"currency\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":10,\"name\":\"country\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":11,\"name\":\"year_start\",\"datatype\":\"date\"},{\"id\":12,\"name\":\"quarter_start\","
            + "\"datatype\":\"date\"},{\"id\":13,\"name\":\"month_start\",\"datatype\":\"date\"},{\"id\":14,"
            + "\"name\":\"week_start\",\"datatype\":\"date\"},{\"id\":15,\"name\":\"day_start\",\"datatype\":"
            + "\"date\"},{\"id\":16,\"name\":\"hour_start\",\"datatype\":\"timestamp\"},{\"id\":17,\"name\":"
            + "\"minute_start\",\"datatype\":\"timestamp\"}],\"database\":\"my_database_name\"}";
    TableDesc desc = JsonUtil.readValue(requestTableData, TableDesc.class);
    String[] dbTable = HadoopUtil.parseHiveTableName(desc.getIdentity());
    desc.setName(dbTable[1]);
    desc.setDatabase(dbTable[0]);
    Assert.assertEquals("my_table_name".toUpperCase(Locale.ROOT), desc.getName());
    Assert.assertEquals("my_database_name".toUpperCase(Locale.ROOT), desc.getDatabase());
}

Example 3

Source File: TableService.java From kylin with Apache License 2.0

6 votes

public List<Pair<TableDesc, TableExtDesc>> extractHiveTableMeta(String[] tables, String project) throws Exception { // de-dup
    SetMultimap<String, String> db2tables = LinkedHashMultimap.create();
    for (String fullTableName : tables) {
        String[] parts = HadoopUtil.parseHiveTableName(fullTableName);
        db2tables.put(parts[0], parts[1]);
    }

    // load all tables first
    List<Pair<TableDesc, TableExtDesc>> allMeta = Lists.newArrayList();
    ProjectInstance projectInstance = getProjectManager().getProject(project);
    ISourceMetadataExplorer explr = SourceManager.getSource(projectInstance).getSourceMetadataExplorer();
    for (Map.Entry<String, String> entry : db2tables.entries()) {
        Pair<TableDesc, TableExtDesc> pair = explr.loadTableMetadata(entry.getKey(), entry.getValue(), project);
        TableDesc tableDesc = pair.getFirst();
        Preconditions.checkState(tableDesc.getDatabase().equals(entry.getKey().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getName().equals(entry.getValue().toUpperCase(Locale.ROOT)));
        Preconditions.checkState(tableDesc.getIdentity()
                .equals(entry.getKey().toUpperCase(Locale.ROOT) + "." + entry.getValue().toUpperCase(Locale.ROOT)));
        TableExtDesc extDesc = pair.getSecond();
        Preconditions.checkState(tableDesc.getIdentity().equals(extDesc.getIdentity()));
        allMeta.add(pair);
    }
    return allMeta;
}

Example 4

Source File: StreamingControllerTest.java From kylin with Apache License 2.0

6 votes

@Test
public void testReadTableDesc() throws IOException {
    String requestTableData = "{\"name\":\"my_table_name\",\"source_type\":1,\"columns\":[{\"id\":1,\"name\":"
            + "\"amount\",\"datatype\":\"decimal\"},{\"id\":2,\"name\":\"category\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":3,\"name\":\"order_time\",\"datatype\":\"timestamp\"},{\"id\":4,\"name\":\"device\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":5,\"name\":\"qty\",\"datatype\":\"int\"},{\"id\":6,\"name\":"
            + "\"user_id\",\"datatype\":\"varchar(256)\"},{\"id\":7,\"name\":\"user_age\",\"datatype\":\"int\"},"
            + "{\"id\":8,\"name\":\"user_gender\",\"datatype\":\"varchar(256)\"},{\"id\":9,\"name\":\"currency\","
            + "\"datatype\":\"varchar(256)\"},{\"id\":10,\"name\":\"country\",\"datatype\":\"varchar(256)\"},"
            + "{\"id\":11,\"name\":\"year_start\",\"datatype\":\"date\"},{\"id\":12,\"name\":\"quarter_start\","
            + "\"datatype\":\"date\"},{\"id\":13,\"name\":\"month_start\",\"datatype\":\"date\"},{\"id\":14,"
            + "\"name\":\"week_start\",\"datatype\":\"date\"},{\"id\":15,\"name\":\"day_start\",\"datatype\":"
            + "\"date\"},{\"id\":16,\"name\":\"hour_start\",\"datatype\":\"timestamp\"},{\"id\":17,\"name\":"
            + "\"minute_start\",\"datatype\":\"timestamp\"}],\"database\":\"my_database_name\"}";
    TableDesc desc = JsonUtil.readValue(requestTableData, TableDesc.class);
    String[] dbTable = HadoopUtil.parseHiveTableName(desc.getIdentity());
    desc.setName(dbTable[1]);
    desc.setDatabase(dbTable[0]);
    Assert.assertEquals("my_table_name".toUpperCase(Locale.ROOT), desc.getName());
    Assert.assertEquals("my_database_name".toUpperCase(Locale.ROOT), desc.getDatabase());
}

Example 5

Source File: HiveSourceTableLoader.java From Kylin with Apache License 2.0

6 votes

public static Set<String> reloadHiveTables(String[] hiveTables, KylinConfig config) throws IOException {

        Map<String, Set<String>> db2tables = Maps.newHashMap();
        for (String table : hiveTables) {
            String[] parts = HadoopUtil.parseHiveTableName(table);
            Set<String> set = db2tables.get(parts[0]);
            if (set == null) {
                set = Sets.newHashSet();
                db2tables.put(parts[0], set);
            }
            set.add(parts[1]);
        }

        // extract from hive
        Set<String> loadedTables = Sets.newHashSet();
        for (String database : db2tables.keySet()) {
            List<String> loaded = extractHiveTables(database, db2tables.get(database), config);
            loadedTables.addAll(loaded);
        }

        return loadedTables;
    }

Example 6

Source File: FactDistinctColumnsJob.java From Kylin with Apache License 2.0

5 votes

private void setupMapper(String intermediateTable) throws IOException {
//        FileInputFormat.setInputPaths(job, input);

        String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
        HCatInputFormat.setInput(job, dbTableNames[0],
                dbTableNames[1]);
        
        job.setInputFormatClass(HCatInputFormat.class);
        job.setMapperClass(FactDistinctColumnsMapper.class);
        job.setCombinerClass(FactDistinctColumnsCombiner.class);
        job.setMapOutputKeyClass(ShortWritable.class);
        job.setMapOutputValueClass(Text.class);
    }

Example 7

Source File: InvertedIndexJob.java From Kylin with Apache License 2.0

5 votes

private void setupMapper(String intermediateTable) throws IOException {

        String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
        HCatInputFormat.setInput(job, dbTableNames[0],
                dbTableNames[1]);
        
        job.setInputFormatClass(HCatInputFormat.class);

        job.setMapperClass(InvertedIndexMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(InvertedIndexPartitioner.class);
    }

Example 8

Source File: CubeService.java From Kylin with Apache License 2.0

5 votes

/**
 * Generate cardinality for table This will trigger a hadoop job
 * The result will be merged into table exd info
 *
 * @param tableName
 */
public void calculateCardinality(String tableName, String submitter) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    tableName = dbTableName[0] + "." + dbTableName[1];
    TableDesc table = getMetadataManager().getTableDesc(tableName);
    final Map<String, String> tableExd = getMetadataManager().getTableDescExd(tableName);
    if (tableExd == null || table == null) {
        IllegalArgumentException e = new IllegalArgumentException("Cannot find table descirptor " + tableName);
        logger.error("Cannot find table descirptor " + tableName, e);
        throw e;
    }

    DefaultChainedExecutable job = new DefaultChainedExecutable();
    job.setName("Hive Column Cardinality calculation for table '" + tableName + "'");
    job.setSubmitter(submitter);

    String outPath = HiveColumnCardinalityJob.OUTPUT_PATH + "/" + tableName;
    String param = "-table " + tableName + " -output " + outPath;

    HadoopShellExecutable step1 = new HadoopShellExecutable();

    step1.setJobClass(HiveColumnCardinalityJob.class);
    step1.setJobParams(param);

    job.addTask(step1);

    HadoopShellExecutable step2 = new HadoopShellExecutable();

    step2.setJobClass(HiveColumnCardinalityUpdateJob.class);
    step2.setJobParams(param);
    job.addTask(step2);

    getExecutableManager().addJob(job);
}

Example 9

Source File: HiveMRInput.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

/**
 * Construct a HiveTableInputFormat to read hive table.
 * @param fullQualifiedTableName "databaseName.tableName"
 */
public HiveTableInputFormat(String fullQualifiedTableName) {
    String[] parts = HadoopUtil.parseHiveTableName(fullQualifiedTableName);
    dbName = parts[0];
    tableName = parts[1];
}

Example 10

Source File: TableService.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public String normalizeHiveTableName(String tableName) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    return (dbTableName[0] + "." + dbTableName[1]).toUpperCase(Locale.ROOT);
}

Example 11

Source File: HiveMRInput.java From kylin with Apache License 2.0

4 votes

/**
 * Construct a HiveTableInputFormat to read hive table.
 * @param fullQualifiedTableName "databaseName.tableName"
 */
public HiveTableInputFormat(String fullQualifiedTableName) {
    String[] parts = HadoopUtil.parseHiveTableName(fullQualifiedTableName);
    dbName = parts[0];
    tableName = parts[1];
}

Example 12

Source File: TableService.java From kylin with Apache License 2.0

4 votes

public String normalizeHiveTableName(String tableName) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    return (dbTableName[0] + "." + dbTableName[1]).toUpperCase(Locale.ROOT);
}

Example 13

Source File: HiveColumnCardinalityJob.java From Kylin with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {

    Options options = new Options();

    try {
        options.addOption(OPTION_TABLE);
        options.addOption(OPTION_OUTPUT_PATH);

        parseOptions(options, args);

        // start job
        String jobName = JOB_TITLE + getOptionsAsString();
        System.out.println("Starting: " + jobName);
        Configuration conf = getConf();
        job = Job.getInstance(conf, jobName);

        setJobClasspath(job);
        
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);
        job.getConfiguration().set("dfs.block.size", "67108864");

        // Mapper
        String table = getOptionValue(OPTION_TABLE);
        String[] dbTableNames = HadoopUtil.parseHiveTableName(table);
        HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);

        job.setInputFormatClass(HCatInputFormat.class);
        job.setMapperClass(ColumnCardinalityMapper.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(BytesWritable.class);

        // Reducer - only one
        job.setReducerClass(ColumnCardinalityReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(LongWritable.class);
        job.setNumReduceTasks(1);

        this.deletePath(job.getConfiguration(), output);

        System.out.println("Going to submit HiveColumnCardinalityJob for table '" + table + "'");
        int result = waitForCompletion(job);

        return result;
    } catch (Exception e) {
        printUsage(options);
        throw e;
    }

}

Example 14

Source File: IIDistinctColumnsJob.java From Kylin with Apache License 2.0

4 votes

private void setupMapper() throws IOException {

        String tableName = job.getConfiguration().get(BatchConstants.TABLE_NAME);
        String[] dbTableNames = HadoopUtil.parseHiveTableName(tableName);

        log.info("setting hcat input format, db name {} , table name {}", dbTableNames[0],dbTableNames[1]);

        HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);

        job.setInputFormatClass(HCatInputFormat.class);

        job.setMapperClass(IIDistinctColumnsMapper.class);
        job.setCombinerClass(IIDistinctColumnsCombiner.class);
        job.setMapOutputKeyClass(ShortWritable.class);
        job.setMapOutputValueClass(Text.class);
    }