Java Code Examples for org.apache.cassandra.hadoop.ConfigHelper

The following examples show how to use org.apache.cassandra.hadoop.ConfigHelper. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public void setConf(final Configuration config) {
    super.setConf(config);

    // Copy some Titan configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config, titanConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (titanConf.has(GraphDatabaseConfiguration.STORAGE_PORT))
        ConfigHelper.setInputRpcPort(config, String.valueOf(titanConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    if (titanConf.has(GraphDatabaseConfiguration.AUTH_USERNAME))
        ConfigHelper.setInputKeyspaceUserName(config, titanConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
    if (titanConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD))
        ConfigHelper.setInputKeyspacePassword(config, titanConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD));

    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config, titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE),
            mrConf.get(TitanHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows);
    log.debug("Set keyspace: {}", titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE));

    // Set the column slice bounds via Faunus's vertex query filter
    final SlicePredicate predicate = new SlicePredicate();
    final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(TitanHadoopSetupCommon.DEFAULT_SLICE_QUERY, rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}
 
Example 2
Source Project: stratio-cassandra   Source File: TestRingCache.java    License: Apache License 2.0 6 votes vote down vote up
private void setup(String server, int port) throws Exception
{
    /* Establish a thrift connection to the cassandra instance */
    TSocket socket = new TSocket(server, port);
    System.out.println(" connected to " + server + ":" + port + ".");
    TBinaryProtocol binaryProtocol = new TBinaryProtocol(new TFramedTransport(socket));
    Cassandra.Client cassandraClient = new Cassandra.Client(binaryProtocol);
    socket.open();
    thriftClient = cassandraClient;
    String seed = DatabaseDescriptor.getSeeds().iterator().next().getHostAddress();
    conf = new Configuration();
    ConfigHelper.setOutputPartitioner(conf, DatabaseDescriptor.getPartitioner().getClass().getName());
    ConfigHelper.setOutputInitialAddress(conf, seed);
    ConfigHelper.setOutputRpcPort(conf, Integer.toString(DatabaseDescriptor.getRpcPort()));

}
 
Example 3
Source Project: hdfs2cass   Source File: CrunchCqlBulkRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
private void prepareWriter()  {
  try {
    if (writer == null) {
      writer = CQLSSTableWriter.builder()
          .forTable(schema)
          .using(insertStatement)
          .withPartitioner(ConfigHelper.getOutputPartitioner(conf))
          .inDirectory(outputDir)
          .sorted()
          .build();
    }
    if (loader == null) {
      CrunchExternalClient externalClient = new CrunchExternalClient(conf);
      externalClient.addKnownCfs(keyspace, schema);
      this.loader = new SSTableLoader(outputDir, externalClient,
          new BulkRecordWriter.NullOutputHandler());
    }
  } catch (Exception e) {
    throw new CrunchRuntimeException(e);
  }
}
 
Example 4
Source Project: hdfs2cass   Source File: CassandraParams.java    License: Apache License 2.0 6 votes vote down vote up
private void configure(final JobConf conf) {
  ConfigHelper.setOutputInitialAddress(conf, this.getSeedNodeHost());
  CrunchConfigHelper.setOutputColumnFamily(conf, this.getKeyspace(), this.getColumnFamily());
  ConfigHelper.setOutputPartitioner(conf, this.getPartitioner());

  if (this.getStreamThrottleMBits().isPresent()) {
    conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits",
        this.getStreamThrottleMBits().get().toString());
  }

  if (this.getCompressionClass().isPresent()) {
    ConfigHelper.setOutputCompressionClass(conf, this.getCompressionClass().get());
  }

  if (this.getRpcPort().isPresent()) {
    ConfigHelper.setOutputRpcPort(conf, String.valueOf(this.getRpcPort().get()));
  }

  conf.setJarByClass(BulkLoader.class);
}
 
Example 5
Source Project: grakn   Source File: InputFormatCQL.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void setConf(Configuration config) {
    this.hadoopConf = config;
    HadoopPoolsConfigurable.super.setConf(config);
    ModifiableConfigurationHadoop mrConf = ModifiableConfigurationHadoop.of(ModifiableConfigurationHadoop.MAPRED_NS, config);
    BasicConfiguration janusgraphConf = mrConf.getJanusGraphConf();

    // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config, janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) {
        ConfigHelper.setInputRpcPort(config, String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    }
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME) && janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) {
        String username = janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD);
        if (StringUtils.isNotBlank(username)) {
            config.set(INPUT_NATIVE_AUTH_PROVIDER, PlainTextAuthProvider.class.getName());
            config.set(USERNAME, username);
            config.set(PASSWORD, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
        }
    }
    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config, janusgraphConf.get(CQLConfigOptions.KEYSPACE),
                                      mrConf.get(ModifiableConfigurationHadoop.COLUMN_FAMILY_NAME), wideRows);
    LOG.debug("Set keyspace: {}", janusgraphConf.get(CQLConfigOptions.KEYSPACE));

    // Set the column slice bounds via Faunus' vertex query filter
    SlicePredicate predicate = new SlicePredicate();
    int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}
 
Example 6
private void validateConfiguration(Configuration conf) {
    if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) {
        throw new UnsupportedOperationException("you must set the keyspace and table with setInputColumnFamily()");
    }
    if (ConfigHelper.getInputInitialAddress(conf) == null) {
        throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node with setInputInitialAddress");
    }
    if (ConfigHelper.getInputPartitioner(conf) == null) {
        throw new UnsupportedOperationException("You must set the Cassandra partitioner class with setInputPartitioner");
    }
}
 
Example 7
private Map<TokenRange, Long> getSubSplits(String keyspace, String cfName, TokenRange range, Configuration conf, CqlSession session) {
    int splitSize = ConfigHelper.getInputSplitSize(conf);
    int splitSizeMb = ConfigHelper.getInputSplitSizeInMb(conf);
    try {
        return describeSplits(keyspace, cfName, range, splitSize, splitSizeMb, session);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 8
Source Project: stratio-cassandra   Source File: WordCountCounters.java    License: Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception
{
    Job job = new Job(getConf(), "wordcountcounters");
    job.setJarByClass(WordCountCounters.class);
    job.setMapperClass(SumMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));


    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY);
    SlicePredicate predicate = new SlicePredicate().setSlice_range(
                                                                    new SliceRange().
                                                                    setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setCount(100));
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

    job.waitForCompletion(true);
    return 0;
}
 
Example 9
Source Project: stratio-cassandra   Source File: WordCountCounters.java    License: Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception
{
    String inputMapperType = "native";
    if (args != null && args[0].startsWith(INPUT_MAPPER_VAR))
    {
        String[] arg0 = args[0].split("=");
        if (arg0 != null && arg0.length == 2)
            inputMapperType = arg0[1];
    }
    Job job = new Job(getConf(), "wordcountcounters");

    job.setCombinerClass(ReducerToFilesystem.class);
    job.setReducerClass(ReducerToFilesystem.class);
    job.setJarByClass(WordCountCounters.class); 

    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCount.OUTPUT_COLUMN_FAMILY);

    CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), "3");
    if ("native".equals(inputMapperType))
    {
        job.setMapperClass(SumNativeMapper.class);
        job.setInputFormatClass(CqlInputFormat.class);
        CqlConfigHelper.setInputCql(job.getConfiguration(), "select * from " + WordCount.OUTPUT_COLUMN_FAMILY + " where token(word) > ? and token(word) <= ? allow filtering");
    }
    else
    {
        job.setMapperClass(SumMapper.class);
        job.setInputFormatClass(CqlInputFormat.class);
        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    }

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));
    job.waitForCompletion(true);
    return 0;
}
 
Example 10
Source Project: stratio-cassandra   Source File: CqlBulkRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
private void setConfigs() throws IOException
{
    // if anything is missing, exceptions will be thrown here, instead of on write()
    keyspace = ConfigHelper.getOutputKeyspace(conf);
    columnFamily = ConfigHelper.getOutputColumnFamily(conf);
    schema = CqlBulkOutputFormat.getColumnFamilySchema(conf, columnFamily);
    insertStatement = CqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, columnFamily);
    outputDir = getColumnFamilyDirectory();
    deleteSrc = CqlBulkOutputFormat.getDeleteSourceOnSuccess(conf);
}
 
Example 11
Source Project: stratio-cassandra   Source File: CqlBulkRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
private void prepareWriter() throws IOException
{
    try
    {
        if (writer == null)
        {
            writer = CQLSSTableWriter.builder()
                .forTable(schema)
                .using(insertStatement)
                .withPartitioner(ConfigHelper.getOutputPartitioner(conf))
                .inDirectory(outputDir)
                .withBufferSizeInMB(Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64")))
                .build();
        }
        if (loader == null)
        {
            ExternalClient externalClient = new ExternalClient(conf);
            
            externalClient.addKnownCfs(keyspace, schema);

            this.loader = new SSTableLoader(outputDir, externalClient, new BulkRecordWriter.NullOutputHandler()) {
                @Override
                public void onSuccess(StreamState finalState)
                {
                    if (deleteSrc)
                        FileUtils.deleteRecursive(outputDir);
                }
            };
        }
    }
    catch (Exception e)
    {
        throw new IOException(e);
    }      
}
 
Example 12
Source Project: stratio-cassandra   Source File: CqlConfigHelper.java    License: Apache License 2.0 5 votes vote down vote up
private static QueryOptions getReadQueryOptions(Configuration conf)
{
    String CL = ConfigHelper.getReadConsistencyLevel(conf);
    Optional<Integer> fetchSize = getInputPageRowSize(conf);
    QueryOptions queryOptions = new QueryOptions();
    if (CL != null && !CL.isEmpty())
        queryOptions.setConsistencyLevel(com.datastax.driver.core.ConsistencyLevel.valueOf(CL));

    if (fetchSize.isPresent())
        queryOptions.setFetchSize(fetchSize.get());
    return queryOptions;
}
 
Example 13
Source Project: stratio-cassandra   Source File: CqlRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
CqlRecordWriter(Configuration conf)
{
    super(conf);
    this.clients = new HashMap<>();

    try
    {
        Cassandra.Client client = ConfigHelper.getClientFromOutputAddressList(conf);
        if (client != null)
        {
            client.set_keyspace(ConfigHelper.getOutputKeyspace(conf));
            String user = ConfigHelper.getOutputKeyspaceUserName(conf);
            String password = ConfigHelper.getOutputKeyspacePassword(conf);
            if ((user != null) && (password != null))
                AbstractColumnFamilyOutputFormat.login(user, password, client);
            retrievePartitionKeyValidator(client);
            String cqlQuery = CqlConfigHelper.getOutputCql(conf).trim();
            if (cqlQuery.toLowerCase().startsWith("insert"))
                throw new UnsupportedOperationException("INSERT with CqlRecordWriter is not supported, please use UPDATE/DELETE statement");
            cql = appendKeyWhereClauses(cqlQuery);

            TTransport transport = client.getOutputProtocol().getTransport();
            if (transport.isOpen())
                transport.close();
        }
        else
        {
            throw new IllegalArgumentException("Invalid configuration specified " + conf);
        }
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }
}
 
Example 14
Source Project: stratio-cassandra   Source File: CqlRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
/** retrieve the key validator from system.schema_columnfamilies table */
private void retrievePartitionKeyValidator(Cassandra.Client client) throws Exception
{
    String keyspace = ConfigHelper.getOutputKeyspace(conf);
    String cfName = ConfigHelper.getOutputColumnFamily(conf);
    String query = "SELECT key_validator," +
    		       "       key_aliases," +
    		       "       column_aliases " +
                   "FROM system.schema_columnfamilies " +
                   "WHERE keyspace_name='%s' and columnfamily_name='%s'";
    String formatted = String.format(query, keyspace, cfName);
    CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(formatted), Compression.NONE, ConsistencyLevel.ONE);

    Column rawKeyValidator = result.rows.get(0).columns.get(0);
    String validator = ByteBufferUtil.string(ByteBuffer.wrap(rawKeyValidator.getValue()));
    keyValidator = parseType(validator);
    
    Column rawPartitionKeys = result.rows.get(0).columns.get(1);
    String keyString = ByteBufferUtil.string(ByteBuffer.wrap(rawPartitionKeys.getValue()));
    logger.debug("partition keys: {}", keyString);

    List<String> keys = FBUtilities.fromJsonList(keyString);
    partitionKeyColumns = new String[keys.size()];
    int i = 0;
    for (String key : keys)
    {
        partitionKeyColumns[i] = key;
        i++;
    }

    Column rawClusterColumns = result.rows.get(0).columns.get(2);
    String clusterColumnString = ByteBufferUtil.string(ByteBuffer.wrap(rawClusterColumns.getValue()));

    logger.debug("cluster columns: {}", clusterColumnString);
    clusterColumns = FBUtilities.fromJsonList(clusterColumnString);
}
 
Example 15
Source Project: stratio-cassandra   Source File: CqlNativeStorage.java    License: Apache License 2.0 5 votes vote down vote up
/** set store configuration settings */
public void setStoreLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    setLocationFromUri(location);

    if (username != null && password != null)
        ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setOutputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
    {
        ConfigHelper.setOutputRpcPort(conf, rpcPort);
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    }
    if (initHostAddress != null)
    {
        ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    }

    ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
    CqlConfigHelper.setOutputCql(conf, outputQuery);

    setConnectionInformation();

    if (ConfigHelper.getOutputRpcPort(conf) == 0)
        throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getOutputInitialAddress(conf) == null)
        throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getOutputPartitioner(conf) == null)
        throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

    initSchema(storeSignature);
}
 
Example 16
Source Project: hdfs2cass   Source File: CrunchBulkRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
private void prepareWriter() {
  String columnFamily = CrunchConfigHelper.getOutputColumnFamily(conf);
  String keyspace = ConfigHelper.getOutputKeyspace(conf);

  if (outputdir == null) {
    // dir must be named by ks/cf for the loader
    outputdir = Paths.get(getOutputLocation(), keyspace, columnFamily).toFile();
    outputdir.mkdirs();
  }

  if (writer == null) {
    AbstractType<?> subcomparator = null;

    if (cfType == CFType.SUPER)
      subcomparator = BytesType.instance;

    this.writer = new SSTableSimpleWriter(
        outputdir, ConfigHelper.getOutputPartitioner(conf),
        keyspace, columnFamily,
        BytesType.instance, subcomparator);

    ExternalSSTableLoaderClient externalClient = new ExternalSSTableLoaderClient(
        ConfigHelper.getOutputInitialAddress(conf),
        ConfigHelper.getOutputRpcPort(conf),
        ConfigHelper.getOutputKeyspaceUserName(conf),
        ConfigHelper.getOutputKeyspacePassword(conf));

    this.loader = new SSTableLoader(outputdir, externalClient,
        new OutputHandler.SystemOutput(true, true));
  }
}
 
Example 17
Source Project: hdfs2cass   Source File: CrunchCqlBulkRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
private void setConfigs()
{
  // if anything is missing, exceptions will be thrown here, instead of on write()
  keyspace = ConfigHelper.getOutputKeyspace(conf);
  columnFamily = CrunchConfigHelper.getOutputColumnFamily(conf);
  schema = CrunchCqlBulkOutputFormat.getColumnFamilySchema(conf, columnFamily);
  insertStatement = CrunchCqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, columnFamily);
  outputDir = getColumnFamilyDirectory();
}
 
Example 18
Source Project: hdfs2cass   Source File: CassandraClusterInfo.java    License: Apache License 2.0 5 votes vote down vote up
public void validateThriftAccessible(final Optional<Integer> rpcPort) {
  Config.setClientMode(true);

  int port = rpcPort.or(ConfigHelper.getOutputRpcPort(new Configuration()));

  ExternalSSTableLoaderClient client = new ExternalSSTableLoaderClient(this.host, port, null, null);
  client.init(this.keyspace);
  if (client.getCFMetaData(this.keyspace, this.columnFamily) == null) {
    throw new CrunchRuntimeException("Column family not accessible: " + this.keyspace + "." + this.columnFamily);
  }
}
 
Example 19
public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);

    validateConfiguration(conf);

    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    LOG.trace("partitioner is {}", partitioner);

    // canonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>();

    try (CqlSession session = getInputSession(ConfigHelper.getInputInitialAddress(conf).split(","), conf)) {
        List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key != null) {
                if (!partitioner.preservesOrder()) {
                    throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving partitioner");
                }
                if (jobKeyRange.start_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                if (jobKeyRange.end_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key));
            } else if (jobKeyRange.start_token != null) {
                jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                        partitioner.getTokenFactory().fromString(jobKeyRange.end_token));
            } else {
                LOG.warn("ignoring jobKeyRange specified without start_key or start_token");
            }
        }

        Metadata metadata = session.getMetadata();

        // canonical ranges and nodes holding replicas
        Map<TokenRange, Set<Node>> masterRangeNodes = getRangeMap(keyspace, metadata);

        for (TokenRange range : masterRangeNodes.keySet()) {
            if (jobRange == null) {
                // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session)));
            } else {
                TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange);
                if (range.intersects(jobTokenRange)) {
                    for (TokenRange intersection : range.intersectWith(jobTokenRange)) {
                        // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}
 
Example 20
@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
            ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG);
    userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG);

    try {

        // create a Cluster instance
        String[] locations = split.getLocations();
        session = getInputSession(locations, conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //get negotiated serialization protocol
    nativeProtocolVersion = session.getContext().getProtocolVersion().getCode();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = conf.get(INPUT_CQL);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    LOG.trace("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    LOG.trace("created {}", rowIterator);
}
 
Example 21
Source Project: stratio-cassandra   Source File: WordCount.java    License: Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception
{
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR))
    {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }
    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++)
    {
        String columnName = "text" + i;

        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem"))
        {
            job.setCombinerClass(ReducerToFilesystem.class);
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        }
        else
        {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }

        job.setInputFormatClass(ColumnFamilyInputFormat.class);

        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
        SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        if (i == 4)
        {
            IndexExpression expr = new IndexExpression(ByteBufferUtil.bytes("int4"), IndexOperator.EQ, ByteBufferUtil.bytes(0));
            ConfigHelper.setInputRange(job.getConfiguration(), Arrays.asList(expr));
        }

        if (i == 5)
        {
            // this will cause the predicate to be ignored in favor of scanning everything as a wide row
            ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);
        }

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");

        job.waitForCompletion(true);
    }
    return 0;
}
 
Example 22
Source Project: stratio-cassandra   Source File: CqlRecordReader.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}
 
Example 23
Source Project: stratio-cassandra   Source File: CqlNativeStorage.java    License: Apache License 2.0 4 votes vote down vote up
/** set read configuration settings */
public void setLocation(String location, Job job) throws IOException
{
    conf = job.getConfiguration();
    setLocationFromUri(location);

    if (username != null && password != null)
    {
        ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);
        CqlConfigHelper.setUserNameAndPassword(conf, username, password);
    }
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setInputPartitioner(conf, partitionerClass);
    if (initHostAddress != null)
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    if (rpcPort != null)
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    if (nativePort != null)
        CqlConfigHelper.setInputNativePort(conf, nativePort);
    if (nativeCoreConnections != null)
        CqlConfigHelper.setInputCoreConnections(conf, nativeCoreConnections);
    if (nativeMaxConnections != null)
        CqlConfigHelper.setInputMaxConnections(conf, nativeMaxConnections);
    if (nativeMinSimultReqs != null)
        CqlConfigHelper.setInputMinSimultReqPerConnections(conf, nativeMinSimultReqs);
    if (nativeMaxSimultReqs != null)
        CqlConfigHelper.setInputMaxSimultReqPerConnections(conf, nativeMaxSimultReqs);
    if (nativeConnectionTimeout != null)
        CqlConfigHelper.setInputNativeConnectionTimeout(conf, nativeConnectionTimeout);
    if (nativeReadConnectionTimeout != null)
        CqlConfigHelper.setInputNativeReadConnectionTimeout(conf, nativeReadConnectionTimeout);
    if (nativeReceiveBufferSize != null)
        CqlConfigHelper.setInputNativeReceiveBufferSize(conf, nativeReceiveBufferSize);
    if (nativeSendBufferSize != null)
        CqlConfigHelper.setInputNativeSendBufferSize(conf, nativeSendBufferSize);
    if (nativeSolinger != null)
        CqlConfigHelper.setInputNativeSolinger(conf, nativeSolinger);
    if (nativeTcpNodelay != null)
        CqlConfigHelper.setInputNativeTcpNodelay(conf, nativeTcpNodelay);
    if (nativeReuseAddress != null)
        CqlConfigHelper.setInputNativeReuseAddress(conf, nativeReuseAddress);
    if (nativeKeepAlive != null)
        CqlConfigHelper.setInputNativeKeepAlive(conf, nativeKeepAlive);
    if (nativeAuthProvider != null)
        CqlConfigHelper.setInputNativeAuthProvider(conf, nativeAuthProvider);
    if (nativeSSLTruststorePath != null)
        CqlConfigHelper.setInputNativeSSLTruststorePath(conf, nativeSSLTruststorePath);
    if (nativeSSLKeystorePath != null)
        CqlConfigHelper.setInputNativeSSLKeystorePath(conf, nativeSSLKeystorePath);
    if (nativeSSLTruststorePassword != null)
        CqlConfigHelper.setInputNativeSSLTruststorePassword(conf, nativeSSLTruststorePassword);
    if (nativeSSLKeystorePassword != null)
        CqlConfigHelper.setInputNativeSSLKeystorePassword(conf, nativeSSLKeystorePassword);
    if (nativeSSLCipherSuites != null)
        CqlConfigHelper.setInputNativeSSLCipherSuites(conf, nativeSSLCipherSuites);

    ConfigHelper.setInputColumnFamily(conf, keyspace, column_family);
    setConnectionInformation();

    CqlConfigHelper.setInputCQLPageRowSize(conf, String.valueOf(pageSize));
    if (inputCql != null)
        CqlConfigHelper.setInputCql(conf, inputCql);
    if (columns != null)
        CqlConfigHelper.setInputColumns(conf, columns);
    if (whereClause != null)
        CqlConfigHelper.setInputWhereClauses(conf, whereClause);

    String whereClauseForPartitionFilter = getWhereClauseForPartitionFilter();
    String wc = whereClause != null && !whereClause.trim().isEmpty() 
                           ? whereClauseForPartitionFilter == null ? whereClause: String.format("%s AND %s", whereClause.trim(), whereClauseForPartitionFilter)
                           : whereClauseForPartitionFilter;

    if (wc != null)
    {
        logger.debug("where clause: {}", wc);
        CqlConfigHelper.setInputWhereClauses(conf, wc);
    } 
    if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null)
    {
        try
        {
            ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE)));
        }
        catch (NumberFormatException e)
        {
            throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
        }           
    }

    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
    if (loadSignature == null)
        loadSignature = location;

    initSchema(loadSignature);
}
 
Example 24
Source Project: stratio-cassandra   Source File: RingCache.java    License: Apache License 2.0 4 votes vote down vote up
public RingCache(Configuration conf)
{
    this.conf = conf;
    this.partitioner = ConfigHelper.getOutputPartitioner(conf);
    refreshEndpointMap();
}
 
Example 25
Source Project: stratio-cassandra   Source File: TestRingCache.java    License: Apache License 2.0 4 votes vote down vote up
public TestRingCache(String keyspace)
{
    ConfigHelper.setOutputColumnFamily(conf, keyspace, "Standard1");
	ringCache = new RingCache(conf);
}
 
Example 26
@Override
public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split,
    JobConf jobConf, final Reporter reporter) throws IOException {
  HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split;

  List<String> columns = AbstractColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping());
  isTransposed = AbstractColumnSerDe.isTransposed(columns);


  List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

  if (columns.size() < readColIDs.size()) {
    throw new IOException("Cannot read more columns than the given table contains.");
  }

  org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit();
  Job job = new Job(jobConf);

  TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {
    @Override
    public void progress() {
      reporter.progress();
    }
  };

  SlicePredicate predicate = new SlicePredicate();

  if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) {
    SliceRange range = new SliceRange();
    AbstractType comparator = BytesType.instance;

    String comparatorType = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR);
    if (comparatorType != null && !comparatorType.equals("")) {
      try {
        comparator = TypeParser.parse(comparatorType);
      } catch (Exception ex) {
        throw new IOException("Comparator class not found.");
      }
    }

    String sliceStart = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START);
    String sliceEnd = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH);
    String reversed = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED);

    range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart));
    range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd));
    range.setReversed(reversed == null ? false : reversed.equals("true"));
    range.setCount(cassandraSplit.getSlicePredicateSize());
    predicate.setSlice_range(range);
  } else {
    int iKey = columns.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);
    predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs));
  }


  try {
    ConfigHelper.setInputColumnFamily(tac.getConfiguration(),
        cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily());

    ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate);
    ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize());
    ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + "");
    ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost());
    ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner());
    // Set Split Size
    ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize());

    CassandraHiveRecordReader rr = null;

    if(isTransposed && tac.getConfiguration().getBoolean(AbstractColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) {
      rr = new CassandraHiveRecordReader(new ColumnFamilyWideRowRecordReader(), isTransposed);
    } else {
      rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed);
    }
    rr.initialize(cfSplit, tac);

    return rr;

  } catch (Exception ie) {
    throw new IOException(ie);
  }
}
 
Example 27
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
  String ks = jobConf.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
  String cf = jobConf.get(AbstractColumnSerDe.CASSANDRA_CF_NAME);
  int slicePredicateSize = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE,
      AbstractColumnSerDe.DEFAULT_SLICE_PREDICATE_SIZE);
  int sliceRangeSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE,
      AbstractColumnSerDe.DEFAULT_RANGE_BATCH_SIZE);
  int splitSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE,
      AbstractColumnSerDe.DEFAULT_SPLIT_SIZE);
  String cassandraColumnMapping = jobConf.get(AbstractColumnSerDe.CASSANDRA_COL_MAPPING);
  int rpcPort = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_PORT, 9160);
  String host = jobConf.get(AbstractColumnSerDe.CASSANDRA_HOST);
  String partitioner = jobConf.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER);

  if (cassandraColumnMapping == null) {
    throw new IOException("cassandra.columns.mapping required for Cassandra Table.");
  }

  SliceRange range = new SliceRange();
  range.setStart(new byte[0]);
  range.setFinish(new byte[0]);
  range.setReversed(false);
  range.setCount(slicePredicateSize);
  SlicePredicate predicate = new SlicePredicate();
  predicate.setSlice_range(range);

  ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort);
  ConfigHelper.setInputInitialAddress(jobConf, host);
  ConfigHelper.setInputPartitioner(jobConf, partitioner);
  ConfigHelper.setInputSlicePredicate(jobConf, predicate);
  ConfigHelper.setInputColumnFamily(jobConf, ks, cf);
  ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize);
  ConfigHelper.setInputSplitSize(jobConf, splitSize);

  Job job = new Job(jobConf);
  JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID());

  Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
  List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext);
  InputSplit[] results = new InputSplit[splits.size()];

  for (int i = 0; i < splits.size(); ++i) {
    HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit(
        (ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]);
    csplit.setKeyspace(ks);
    csplit.setColumnFamily(cf);
    csplit.setRangeBatchSize(sliceRangeSize);
    csplit.setSplitSize(splitSize);
    csplit.setHost(host);
    csplit.setPort(rpcPort);
    csplit.setSlicePredicateSize(slicePredicateSize);
    csplit.setPartitioner(partitioner);
    csplit.setColumnMapping(cassandraColumnMapping);
    results[i] = csplit;
  }
  return results;
}
 
Example 28
Source Project: hdfs2cass   Source File: CrunchConfigHelper.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Set the keyspace and column family for the output of this job.
 * <p>
 * Use this instead of
 * {@link org.apache.cassandra.hadoop.ConfigHelper#setOutputColumnFamily(org.apache.hadoop.conf.Configuration, String, String)}
 * </p>
 */
public static void setOutputColumnFamily(Configuration conf, String keyspace, String columnFamily) {
  ConfigHelper.setOutputKeyspace(conf, keyspace);
  setOutputColumnFamily(conf, columnFamily);
}