Java Code Examples for org.apache.cassandra.hadoop.ConfigHelper#setInputPartitioner()

The following examples show how to use org.apache.cassandra.hadoop.ConfigHelper#setInputPartitioner() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordCountCounters.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception
{
    Job job = new Job(getConf(), "wordcountcounters");
    job.setJarByClass(WordCountCounters.class);
    job.setMapperClass(SumMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));


    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY);
    SlicePredicate predicate = new SlicePredicate().setSlice_range(
                                                                    new SliceRange().
                                                                    setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setCount(100));
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

    job.waitForCompletion(true);
    return 0;
}
 
Example 2
Source File: WordCountCounters.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception
{
    String inputMapperType = "native";
    if (args != null && args[0].startsWith(INPUT_MAPPER_VAR))
    {
        String[] arg0 = args[0].split("=");
        if (arg0 != null && arg0.length == 2)
            inputMapperType = arg0[1];
    }
    Job job = new Job(getConf(), "wordcountcounters");

    job.setCombinerClass(ReducerToFilesystem.class);
    job.setReducerClass(ReducerToFilesystem.class);
    job.setJarByClass(WordCountCounters.class); 

    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCount.OUTPUT_COLUMN_FAMILY);

    CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), "3");
    if ("native".equals(inputMapperType))
    {
        job.setMapperClass(SumNativeMapper.class);
        job.setInputFormatClass(CqlInputFormat.class);
        CqlConfigHelper.setInputCql(job.getConfiguration(), "select * from " + WordCount.OUTPUT_COLUMN_FAMILY + " where token(word) > ? and token(word) <= ? allow filtering");
    }
    else
    {
        job.setMapperClass(SumMapper.class);
        job.setInputFormatClass(CqlInputFormat.class);
        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    }

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));
    job.waitForCompletion(true);
    return 0;
}
 
Example 3
Source File: WordCount.java    From stratio-cassandra with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception
{
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR))
    {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }
    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++)
    {
        String columnName = "text" + i;

        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem"))
        {
            job.setCombinerClass(ReducerToFilesystem.class);
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        }
        else
        {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }

        job.setInputFormatClass(ColumnFamilyInputFormat.class);

        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
        SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        if (i == 4)
        {
            IndexExpression expr = new IndexExpression(ByteBufferUtil.bytes("int4"), IndexOperator.EQ, ByteBufferUtil.bytes(0));
            ConfigHelper.setInputRange(job.getConfiguration(), Arrays.asList(expr));
        }

        if (i == 5)
        {
            // this will cause the predicate to be ignored in favor of scanning everything as a wide row
            ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);
        }

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");

        job.waitForCompletion(true);
    }
    return 0;
}
 
Example 4
Source File: CqlNativeStorage.java    From stratio-cassandra with Apache License 2.0 4 votes vote down vote up
/** set read configuration settings */
public void setLocation(String location, Job job) throws IOException
{
    conf = job.getConfiguration();
    setLocationFromUri(location);

    if (username != null && password != null)
    {
        ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);
        CqlConfigHelper.setUserNameAndPassword(conf, username, password);
    }
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setInputPartitioner(conf, partitionerClass);
    if (initHostAddress != null)
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    if (rpcPort != null)
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    if (nativePort != null)
        CqlConfigHelper.setInputNativePort(conf, nativePort);
    if (nativeCoreConnections != null)
        CqlConfigHelper.setInputCoreConnections(conf, nativeCoreConnections);
    if (nativeMaxConnections != null)
        CqlConfigHelper.setInputMaxConnections(conf, nativeMaxConnections);
    if (nativeMinSimultReqs != null)
        CqlConfigHelper.setInputMinSimultReqPerConnections(conf, nativeMinSimultReqs);
    if (nativeMaxSimultReqs != null)
        CqlConfigHelper.setInputMaxSimultReqPerConnections(conf, nativeMaxSimultReqs);
    if (nativeConnectionTimeout != null)
        CqlConfigHelper.setInputNativeConnectionTimeout(conf, nativeConnectionTimeout);
    if (nativeReadConnectionTimeout != null)
        CqlConfigHelper.setInputNativeReadConnectionTimeout(conf, nativeReadConnectionTimeout);
    if (nativeReceiveBufferSize != null)
        CqlConfigHelper.setInputNativeReceiveBufferSize(conf, nativeReceiveBufferSize);
    if (nativeSendBufferSize != null)
        CqlConfigHelper.setInputNativeSendBufferSize(conf, nativeSendBufferSize);
    if (nativeSolinger != null)
        CqlConfigHelper.setInputNativeSolinger(conf, nativeSolinger);
    if (nativeTcpNodelay != null)
        CqlConfigHelper.setInputNativeTcpNodelay(conf, nativeTcpNodelay);
    if (nativeReuseAddress != null)
        CqlConfigHelper.setInputNativeReuseAddress(conf, nativeReuseAddress);
    if (nativeKeepAlive != null)
        CqlConfigHelper.setInputNativeKeepAlive(conf, nativeKeepAlive);
    if (nativeAuthProvider != null)
        CqlConfigHelper.setInputNativeAuthProvider(conf, nativeAuthProvider);
    if (nativeSSLTruststorePath != null)
        CqlConfigHelper.setInputNativeSSLTruststorePath(conf, nativeSSLTruststorePath);
    if (nativeSSLKeystorePath != null)
        CqlConfigHelper.setInputNativeSSLKeystorePath(conf, nativeSSLKeystorePath);
    if (nativeSSLTruststorePassword != null)
        CqlConfigHelper.setInputNativeSSLTruststorePassword(conf, nativeSSLTruststorePassword);
    if (nativeSSLKeystorePassword != null)
        CqlConfigHelper.setInputNativeSSLKeystorePassword(conf, nativeSSLKeystorePassword);
    if (nativeSSLCipherSuites != null)
        CqlConfigHelper.setInputNativeSSLCipherSuites(conf, nativeSSLCipherSuites);

    ConfigHelper.setInputColumnFamily(conf, keyspace, column_family);
    setConnectionInformation();

    CqlConfigHelper.setInputCQLPageRowSize(conf, String.valueOf(pageSize));
    if (inputCql != null)
        CqlConfigHelper.setInputCql(conf, inputCql);
    if (columns != null)
        CqlConfigHelper.setInputColumns(conf, columns);
    if (whereClause != null)
        CqlConfigHelper.setInputWhereClauses(conf, whereClause);

    String whereClauseForPartitionFilter = getWhereClauseForPartitionFilter();
    String wc = whereClause != null && !whereClause.trim().isEmpty() 
                           ? whereClauseForPartitionFilter == null ? whereClause: String.format("%s AND %s", whereClause.trim(), whereClauseForPartitionFilter)
                           : whereClauseForPartitionFilter;

    if (wc != null)
    {
        logger.debug("where clause: {}", wc);
        CqlConfigHelper.setInputWhereClauses(conf, wc);
    } 
    if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null)
    {
        try
        {
            ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE)));
        }
        catch (NumberFormatException e)
        {
            throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
        }           
    }

    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
    if (loadSignature == null)
        loadSignature = location;

    initSchema(loadSignature);
}
 
Example 5
Source File: HiveCassandraStandardColumnInputFormat.java    From Hive-Cassandra with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split,
    JobConf jobConf, final Reporter reporter) throws IOException {
  HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split;

  List<String> columns = AbstractColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping());
  isTransposed = AbstractColumnSerDe.isTransposed(columns);


  List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

  if (columns.size() < readColIDs.size()) {
    throw new IOException("Cannot read more columns than the given table contains.");
  }

  org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit();
  Job job = new Job(jobConf);

  TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {
    @Override
    public void progress() {
      reporter.progress();
    }
  };

  SlicePredicate predicate = new SlicePredicate();

  if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) {
    SliceRange range = new SliceRange();
    AbstractType comparator = BytesType.instance;

    String comparatorType = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR);
    if (comparatorType != null && !comparatorType.equals("")) {
      try {
        comparator = TypeParser.parse(comparatorType);
      } catch (Exception ex) {
        throw new IOException("Comparator class not found.");
      }
    }

    String sliceStart = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START);
    String sliceEnd = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH);
    String reversed = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED);

    range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart));
    range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd));
    range.setReversed(reversed == null ? false : reversed.equals("true"));
    range.setCount(cassandraSplit.getSlicePredicateSize());
    predicate.setSlice_range(range);
  } else {
    int iKey = columns.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);
    predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs));
  }


  try {
    ConfigHelper.setInputColumnFamily(tac.getConfiguration(),
        cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily());

    ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate);
    ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize());
    ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + "");
    ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost());
    ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner());
    // Set Split Size
    ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize());

    CassandraHiveRecordReader rr = null;

    if(isTransposed && tac.getConfiguration().getBoolean(AbstractColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) {
      rr = new CassandraHiveRecordReader(new ColumnFamilyWideRowRecordReader(), isTransposed);
    } else {
      rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed);
    }
    rr.initialize(cfSplit, tac);

    return rr;

  } catch (Exception ie) {
    throw new IOException(ie);
  }
}
 
Example 6
Source File: HiveCassandraStandardColumnInputFormat.java    From Hive-Cassandra with Apache License 2.0 4 votes vote down vote up
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
  String ks = jobConf.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
  String cf = jobConf.get(AbstractColumnSerDe.CASSANDRA_CF_NAME);
  int slicePredicateSize = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE,
      AbstractColumnSerDe.DEFAULT_SLICE_PREDICATE_SIZE);
  int sliceRangeSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE,
      AbstractColumnSerDe.DEFAULT_RANGE_BATCH_SIZE);
  int splitSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE,
      AbstractColumnSerDe.DEFAULT_SPLIT_SIZE);
  String cassandraColumnMapping = jobConf.get(AbstractColumnSerDe.CASSANDRA_COL_MAPPING);
  int rpcPort = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_PORT, 9160);
  String host = jobConf.get(AbstractColumnSerDe.CASSANDRA_HOST);
  String partitioner = jobConf.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER);

  if (cassandraColumnMapping == null) {
    throw new IOException("cassandra.columns.mapping required for Cassandra Table.");
  }

  SliceRange range = new SliceRange();
  range.setStart(new byte[0]);
  range.setFinish(new byte[0]);
  range.setReversed(false);
  range.setCount(slicePredicateSize);
  SlicePredicate predicate = new SlicePredicate();
  predicate.setSlice_range(range);

  ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort);
  ConfigHelper.setInputInitialAddress(jobConf, host);
  ConfigHelper.setInputPartitioner(jobConf, partitioner);
  ConfigHelper.setInputSlicePredicate(jobConf, predicate);
  ConfigHelper.setInputColumnFamily(jobConf, ks, cf);
  ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize);
  ConfigHelper.setInputSplitSize(jobConf, splitSize);

  Job job = new Job(jobConf);
  JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID());

  Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
  List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext);
  InputSplit[] results = new InputSplit[splits.size()];

  for (int i = 0; i < splits.size(); ++i) {
    HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit(
        (ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]);
    csplit.setKeyspace(ks);
    csplit.setColumnFamily(cf);
    csplit.setRangeBatchSize(sliceRangeSize);
    csplit.setSplitSize(splitSize);
    csplit.setHost(host);
    csplit.setPort(rpcPort);
    csplit.setSlicePredicateSize(slicePredicateSize);
    csplit.setPartitioner(partitioner);
    csplit.setColumnMapping(cassandraColumnMapping);
    results[i] = csplit;
  }
  return results;
}