Java Code Examples for org.apache.cassandra.hadoop.ColumnFamilyInputFormat

The following examples show how to use org.apache.cassandra.hadoop.ColumnFamilyInputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: stratio-cassandra   Source File: WordCountCounters.java    License: Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception
{
    Job job = new Job(getConf(), "wordcountcounters");
    job.setJarByClass(WordCountCounters.class);
    job.setMapperClass(SumMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));


    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY);
    SlicePredicate predicate = new SlicePredicate().setSlice_range(
                                                                    new SliceRange().
                                                                    setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setCount(100));
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

    job.waitForCompletion(true);
    return 0;
}
 
Example 2
Source Project: stratio-cassandra   Source File: WordCount.java    License: Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception
{
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR))
    {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }
    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++)
    {
        String columnName = "text" + i;

        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem"))
        {
            job.setCombinerClass(ReducerToFilesystem.class);
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        }
        else
        {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }

        job.setInputFormatClass(ColumnFamilyInputFormat.class);

        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
        SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        if (i == 4)
        {
            IndexExpression expr = new IndexExpression(ByteBufferUtil.bytes("int4"), IndexOperator.EQ, ByteBufferUtil.bytes(0));
            ConfigHelper.setInputRange(job.getConfiguration(), Arrays.asList(expr));
        }

        if (i == 5)
        {
            // this will cause the predicate to be ignored in favor of scanning everything as a wide row
            ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);
        }

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");

        job.waitForCompletion(true);
    }
    return 0;
}