org.apache.cassandra.hadoop.ConfigHelper Java Exaples

Source File: TestRingCache.java From stratio-cassandra with Apache License 2.0

6 votes

private void setup(String server, int port) throws Exception
{
    /* Establish a thrift connection to the cassandra instance */
    TSocket socket = new TSocket(server, port);
    System.out.println(" connected to " + server + ":" + port + ".");
    TBinaryProtocol binaryProtocol = new TBinaryProtocol(new TFramedTransport(socket));
    Cassandra.Client cassandraClient = new Cassandra.Client(binaryProtocol);
    socket.open();
    thriftClient = cassandraClient;
    String seed = DatabaseDescriptor.getSeeds().iterator().next().getHostAddress();
    conf = new Configuration();
    ConfigHelper.setOutputPartitioner(conf, DatabaseDescriptor.getPartitioner().getClass().getName());
    ConfigHelper.setOutputInitialAddress(conf, seed);
    ConfigHelper.setOutputRpcPort(conf, Integer.toString(DatabaseDescriptor.getRpcPort()));

}

Source File: CassandraParams.java From hdfs2cass with Apache License 2.0

6 votes

private void configure(final JobConf conf) {
  ConfigHelper.setOutputInitialAddress(conf, this.getSeedNodeHost());
  CrunchConfigHelper.setOutputColumnFamily(conf, this.getKeyspace(), this.getColumnFamily());
  ConfigHelper.setOutputPartitioner(conf, this.getPartitioner());

  if (this.getStreamThrottleMBits().isPresent()) {
    conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits",
        this.getStreamThrottleMBits().get().toString());
  }

  if (this.getCompressionClass().isPresent()) {
    ConfigHelper.setOutputCompressionClass(conf, this.getCompressionClass().get());
  }

  if (this.getRpcPort().isPresent()) {
    ConfigHelper.setOutputRpcPort(conf, String.valueOf(this.getRpcPort().get()));
  }

  conf.setJarByClass(BulkLoader.class);
}

Source File: CassandraBinaryInputFormat.java From titan1withtp3.1 with Apache License 2.0

6 votes

@Override
public void setConf(final Configuration config) {
    super.setConf(config);

    // Copy some Titan configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config, titanConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (titanConf.has(GraphDatabaseConfiguration.STORAGE_PORT))
        ConfigHelper.setInputRpcPort(config, String.valueOf(titanConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    if (titanConf.has(GraphDatabaseConfiguration.AUTH_USERNAME))
        ConfigHelper.setInputKeyspaceUserName(config, titanConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
    if (titanConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD))
        ConfigHelper.setInputKeyspacePassword(config, titanConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD));

    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    final boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config, titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE),
            mrConf.get(TitanHadoopConfiguration.COLUMN_FAMILY_NAME), wideRows);
    log.debug("Set keyspace: {}", titanConf.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE));

    // Set the column slice bounds via Faunus's vertex query filter
    final SlicePredicate predicate = new SlicePredicate();
    final int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(TitanHadoopSetupCommon.DEFAULT_SLICE_QUERY, rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}

Source File: CrunchCqlBulkRecordWriter.java From hdfs2cass with Apache License 2.0

6 votes

private void prepareWriter()  {
  try {
    if (writer == null) {
      writer = CQLSSTableWriter.builder()
          .forTable(schema)
          .using(insertStatement)
          .withPartitioner(ConfigHelper.getOutputPartitioner(conf))
          .inDirectory(outputDir)
          .sorted()
          .build();
    }
    if (loader == null) {
      CrunchExternalClient externalClient = new CrunchExternalClient(conf);
      externalClient.addKnownCfs(keyspace, schema);
      this.loader = new SSTableLoader(outputDir, externalClient,
          new BulkRecordWriter.NullOutputHandler());
    }
  } catch (Exception e) {
    throw new CrunchRuntimeException(e);
  }
}

Source File: CqlBulkRecordWriter.java From stratio-cassandra with Apache License 2.0

5 votes

private void prepareWriter() throws IOException
{
    try
    {
        if (writer == null)
        {
            writer = CQLSSTableWriter.builder()
                .forTable(schema)
                .using(insertStatement)
                .withPartitioner(ConfigHelper.getOutputPartitioner(conf))
                .inDirectory(outputDir)
                .withBufferSizeInMB(Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64")))
                .build();
        }
        if (loader == null)
        {
            ExternalClient externalClient = new ExternalClient(conf);
            
            externalClient.addKnownCfs(keyspace, schema);

            this.loader = new SSTableLoader(outputDir, externalClient, new BulkRecordWriter.NullOutputHandler()) {
                @Override
                public void onSuccess(StreamState finalState)
                {
                    if (deleteSrc)
                        FileUtils.deleteRecursive(outputDir);
                }
            };
        }
    }
    catch (Exception e)
    {
        throw new IOException(e);
    }      
}

Source File: CrunchBulkRecordWriter.java From hdfs2cass with Apache License 2.0

5 votes

private void prepareWriter() {
  String columnFamily = CrunchConfigHelper.getOutputColumnFamily(conf);
  String keyspace = ConfigHelper.getOutputKeyspace(conf);

  if (outputdir == null) {
    // dir must be named by ks/cf for the loader
    outputdir = Paths.get(getOutputLocation(), keyspace, columnFamily).toFile();
    outputdir.mkdirs();
  }

  if (writer == null) {
    AbstractType<?> subcomparator = null;

    if (cfType == CFType.SUPER)
      subcomparator = BytesType.instance;

    this.writer = new SSTableSimpleWriter(
        outputdir, ConfigHelper.getOutputPartitioner(conf),
        keyspace, columnFamily,
        BytesType.instance, subcomparator);

    ExternalSSTableLoaderClient externalClient = new ExternalSSTableLoaderClient(
        ConfigHelper.getOutputInitialAddress(conf),
        ConfigHelper.getOutputRpcPort(conf),
        ConfigHelper.getOutputKeyspaceUserName(conf),
        ConfigHelper.getOutputKeyspacePassword(conf));

    this.loader = new SSTableLoader(outputdir, externalClient,
        new OutputHandler.SystemOutput(true, true));
  }
}

Source File: CqlNativeStorage.java From stratio-cassandra with Apache License 2.0

5 votes

/** set store configuration settings */
public void setStoreLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    setLocationFromUri(location);

    if (username != null && password != null)
        ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setOutputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
    {
        ConfigHelper.setOutputRpcPort(conf, rpcPort);
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    }
    if (initHostAddress != null)
    {
        ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    }

    ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
    CqlConfigHelper.setOutputCql(conf, outputQuery);

    setConnectionInformation();

    if (ConfigHelper.getOutputRpcPort(conf) == 0)
        throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getOutputInitialAddress(conf) == null)
        throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getOutputPartitioner(conf) == null)
        throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

    initSchema(storeSignature);
}

Source File: CrunchCqlBulkRecordWriter.java From hdfs2cass with Apache License 2.0

5 votes

private void setConfigs()
{
  // if anything is missing, exceptions will be thrown here, instead of on write()
  keyspace = ConfigHelper.getOutputKeyspace(conf);
  columnFamily = CrunchConfigHelper.getOutputColumnFamily(conf);
  schema = CrunchCqlBulkOutputFormat.getColumnFamilySchema(conf, columnFamily);
  insertStatement = CrunchCqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, columnFamily);
  outputDir = getColumnFamilyDirectory();
}

Source File: InputFormatCQL.java From grakn with GNU Affero General Public License v3.0

5 votes

@Override
public void setConf(Configuration config) {
    this.hadoopConf = config;
    HadoopPoolsConfigurable.super.setConf(config);
    ModifiableConfigurationHadoop mrConf = ModifiableConfigurationHadoop.of(ModifiableConfigurationHadoop.MAPRED_NS, config);
    BasicConfiguration janusgraphConf = mrConf.getJanusGraphConf();

    // Copy some JanusGraph configuration keys to the Hadoop Configuration keys used by Cassandra's ColumnFamilyInputFormat
    ConfigHelper.setInputInitialAddress(config, janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_HOSTS)[0]);
    if (janusgraphConf.has(GraphDatabaseConfiguration.STORAGE_PORT)) {
        ConfigHelper.setInputRpcPort(config, String.valueOf(janusgraphConf.get(GraphDatabaseConfiguration.STORAGE_PORT)));
    }
    if (janusgraphConf.has(GraphDatabaseConfiguration.AUTH_USERNAME) && janusgraphConf.has(GraphDatabaseConfiguration.AUTH_PASSWORD)) {
        String username = janusgraphConf.get(GraphDatabaseConfiguration.AUTH_PASSWORD);
        if (StringUtils.isNotBlank(username)) {
            config.set(INPUT_NATIVE_AUTH_PROVIDER, PlainTextAuthProvider.class.getName());
            config.set(USERNAME, username);
            config.set(PASSWORD, janusgraphConf.get(GraphDatabaseConfiguration.AUTH_USERNAME));
        }
    }
    // Copy keyspace, force the CF setting to edgestore, honor widerows when set
    boolean wideRows = config.getBoolean(INPUT_WIDEROWS_CONFIG, false);
    // Use the setInputColumnFamily overload that includes a widerows argument; using the overload without this argument forces it false
    ConfigHelper.setInputColumnFamily(config, janusgraphConf.get(CQLConfigOptions.KEYSPACE),
                                      mrConf.get(ModifiableConfigurationHadoop.COLUMN_FAMILY_NAME), wideRows);
    LOG.debug("Set keyspace: {}", janusgraphConf.get(CQLConfigOptions.KEYSPACE));

    // Set the column slice bounds via Faunus' vertex query filter
    SlicePredicate predicate = new SlicePredicate();
    int rangeBatchSize = config.getInt(RANGE_BATCH_SIZE_CONFIG, Integer.MAX_VALUE);
    predicate.setSlice_range(getSliceRange(rangeBatchSize)); // TODO stop slicing the whole row
    ConfigHelper.setInputSlicePredicate(config, predicate);
}

Source File: CqlRecordWriter.java From stratio-cassandra with Apache License 2.0

5 votes

/** retrieve the key validator from system.schema_columnfamilies table */
private void retrievePartitionKeyValidator(Cassandra.Client client) throws Exception
{
    String keyspace = ConfigHelper.getOutputKeyspace(conf);
    String cfName = ConfigHelper.getOutputColumnFamily(conf);
    String query = "SELECT key_validator," +
    		       "       key_aliases," +
    		       "       column_aliases " +
                   "FROM system.schema_columnfamilies " +
                   "WHERE keyspace_name='%s' and columnfamily_name='%s'";
    String formatted = String.format(query, keyspace, cfName);
    CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(formatted), Compression.NONE, ConsistencyLevel.ONE);

    Column rawKeyValidator = result.rows.get(0).columns.get(0);
    String validator = ByteBufferUtil.string(ByteBuffer.wrap(rawKeyValidator.getValue()));
    keyValidator = parseType(validator);
    
    Column rawPartitionKeys = result.rows.get(0).columns.get(1);
    String keyString = ByteBufferUtil.string(ByteBuffer.wrap(rawPartitionKeys.getValue()));
    logger.debug("partition keys: {}", keyString);

    List<String> keys = FBUtilities.fromJsonList(keyString);
    partitionKeyColumns = new String[keys.size()];
    int i = 0;
    for (String key : keys)
    {
        partitionKeyColumns[i] = key;
        i++;
    }

    Column rawClusterColumns = result.rows.get(0).columns.get(2);
    String clusterColumnString = ByteBufferUtil.string(ByteBuffer.wrap(rawClusterColumns.getValue()));

    logger.debug("cluster columns: {}", clusterColumnString);
    clusterColumns = FBUtilities.fromJsonList(clusterColumnString);
}

Source File: CqlRecordWriter.java From stratio-cassandra with Apache License 2.0

5 votes

CqlRecordWriter(Configuration conf)
{
    super(conf);
    this.clients = new HashMap<>();

    try
    {
        Cassandra.Client client = ConfigHelper.getClientFromOutputAddressList(conf);
        if (client != null)
        {
            client.set_keyspace(ConfigHelper.getOutputKeyspace(conf));
            String user = ConfigHelper.getOutputKeyspaceUserName(conf);
            String password = ConfigHelper.getOutputKeyspacePassword(conf);
            if ((user != null) && (password != null))
                AbstractColumnFamilyOutputFormat.login(user, password, client);
            retrievePartitionKeyValidator(client);
            String cqlQuery = CqlConfigHelper.getOutputCql(conf).trim();
            if (cqlQuery.toLowerCase().startsWith("insert"))
                throw new UnsupportedOperationException("INSERT with CqlRecordWriter is not supported, please use UPDATE/DELETE statement");
            cql = appendKeyWhereClauses(cqlQuery);

            TTransport transport = client.getOutputProtocol().getTransport();
            if (transport.isOpen())
                transport.close();
        }
        else
        {
            throw new IllegalArgumentException("Invalid configuration specified " + conf);
        }
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }
}

Source File: CqlConfigHelper.java From stratio-cassandra with Apache License 2.0

5 votes

private static QueryOptions getReadQueryOptions(Configuration conf)
{
    String CL = ConfigHelper.getReadConsistencyLevel(conf);
    Optional<Integer> fetchSize = getInputPageRowSize(conf);
    QueryOptions queryOptions = new QueryOptions();
    if (CL != null && !CL.isEmpty())
        queryOptions.setConsistencyLevel(com.datastax.driver.core.ConsistencyLevel.valueOf(CL));

    if (fetchSize.isPresent())
        queryOptions.setFetchSize(fetchSize.get());
    return queryOptions;
}

Source File: CqlBulkRecordWriter.java From stratio-cassandra with Apache License 2.0

5 votes

private void setConfigs() throws IOException
{
    // if anything is missing, exceptions will be thrown here, instead of on write()
    keyspace = ConfigHelper.getOutputKeyspace(conf);
    columnFamily = ConfigHelper.getOutputColumnFamily(conf);
    schema = CqlBulkOutputFormat.getColumnFamilySchema(conf, columnFamily);
    insertStatement = CqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, columnFamily);
    outputDir = getColumnFamilyDirectory();
    deleteSrc = CqlBulkOutputFormat.getDeleteSourceOnSuccess(conf);
}

Source File: WordCountCounters.java From stratio-cassandra with Apache License 2.0

5 votes

public int run(String[] args) throws Exception
{
    String inputMapperType = "native";
    if (args != null && args[0].startsWith(INPUT_MAPPER_VAR))
    {
        String[] arg0 = args[0].split("=");
        if (arg0 != null && arg0.length == 2)
            inputMapperType = arg0[1];
    }
    Job job = new Job(getConf(), "wordcountcounters");

    job.setCombinerClass(ReducerToFilesystem.class);
    job.setReducerClass(ReducerToFilesystem.class);
    job.setJarByClass(WordCountCounters.class); 

    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCount.OUTPUT_COLUMN_FAMILY);

    CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), "3");
    if ("native".equals(inputMapperType))
    {
        job.setMapperClass(SumNativeMapper.class);
        job.setInputFormatClass(CqlInputFormat.class);
        CqlConfigHelper.setInputCql(job.getConfiguration(), "select * from " + WordCount.OUTPUT_COLUMN_FAMILY + " where token(word) > ? and token(word) <= ? allow filtering");
    }
    else
    {
        job.setMapperClass(SumMapper.class);
        job.setInputFormatClass(CqlInputFormat.class);
        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    }

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));
    job.waitForCompletion(true);
    return 0;
}

Source File: WordCountCounters.java From stratio-cassandra with Apache License 2.0

5 votes

public int run(String[] args) throws Exception
{
    Job job = new Job(getConf(), "wordcountcounters");
    job.setJarByClass(WordCountCounters.class);
    job.setMapperClass(SumMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));


    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY);
    SlicePredicate predicate = new SlicePredicate().setSlice_range(
                                                                    new SliceRange().
                                                                    setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).
                                                                    setCount(100));
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

    job.waitForCompletion(true);
    return 0;
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

5 votes

private Map<TokenRange, Long> getSubSplits(String keyspace, String cfName, TokenRange range, Configuration conf, CqlSession session) {
    int splitSize = ConfigHelper.getInputSplitSize(conf);
    int splitSizeMb = ConfigHelper.getInputSplitSizeInMb(conf);
    try {
        return describeSplits(keyspace, cfName, range, splitSize, splitSizeMb, session);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

Source File: CassandraClusterInfo.java From hdfs2cass with Apache License 2.0

5 votes

public void validateThriftAccessible(final Optional<Integer> rpcPort) {
  Config.setClientMode(true);

  int port = rpcPort.or(ConfigHelper.getOutputRpcPort(new Configuration()));

  ExternalSSTableLoaderClient client = new ExternalSSTableLoaderClient(this.host, port, null, null);
  client.init(this.keyspace);
  if (client.getCFMetaData(this.keyspace, this.columnFamily) == null) {
    throw new CrunchRuntimeException("Column family not accessible: " + this.keyspace + "." + this.columnFamily);
  }
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

5 votes

private void validateConfiguration(Configuration conf) {
    if (ConfigHelper.getInputKeyspace(conf) == null || ConfigHelper.getInputColumnFamily(conf) == null) {
        throw new UnsupportedOperationException("you must set the keyspace and table with setInputColumnFamily()");
    }
    if (ConfigHelper.getInputInitialAddress(conf) == null) {
        throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node with setInputInitialAddress");
    }
    if (ConfigHelper.getInputPartitioner(conf) == null) {
        throw new UnsupportedOperationException("You must set the Cassandra partitioner class with setInputPartitioner");
    }
}

Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}

Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
  String ks = jobConf.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
  String cf = jobConf.get(AbstractColumnSerDe.CASSANDRA_CF_NAME);
  int slicePredicateSize = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE,
      AbstractColumnSerDe.DEFAULT_SLICE_PREDICATE_SIZE);
  int sliceRangeSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE,
      AbstractColumnSerDe.DEFAULT_RANGE_BATCH_SIZE);
  int splitSize = jobConf.getInt(
      AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE,
      AbstractColumnSerDe.DEFAULT_SPLIT_SIZE);
  String cassandraColumnMapping = jobConf.get(AbstractColumnSerDe.CASSANDRA_COL_MAPPING);
  int rpcPort = jobConf.getInt(AbstractColumnSerDe.CASSANDRA_PORT, 9160);
  String host = jobConf.get(AbstractColumnSerDe.CASSANDRA_HOST);
  String partitioner = jobConf.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER);

  if (cassandraColumnMapping == null) {
    throw new IOException("cassandra.columns.mapping required for Cassandra Table.");
  }

  SliceRange range = new SliceRange();
  range.setStart(new byte[0]);
  range.setFinish(new byte[0]);
  range.setReversed(false);
  range.setCount(slicePredicateSize);
  SlicePredicate predicate = new SlicePredicate();
  predicate.setSlice_range(range);

  ConfigHelper.setInputRpcPort(jobConf, "" + rpcPort);
  ConfigHelper.setInputInitialAddress(jobConf, host);
  ConfigHelper.setInputPartitioner(jobConf, partitioner);
  ConfigHelper.setInputSlicePredicate(jobConf, predicate);
  ConfigHelper.setInputColumnFamily(jobConf, ks, cf);
  ConfigHelper.setRangeBatchSize(jobConf, sliceRangeSize);
  ConfigHelper.setInputSplitSize(jobConf, splitSize);

  Job job = new Job(jobConf);
  JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID());

  Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
  List<org.apache.hadoop.mapreduce.InputSplit> splits = getSplits(jobContext);
  InputSplit[] results = new InputSplit[splits.size()];

  for (int i = 0; i < splits.size(); ++i) {
    HiveCassandraStandardSplit csplit = new HiveCassandraStandardSplit(
        (ColumnFamilySplit) splits.get(i), cassandraColumnMapping, tablePaths[0]);
    csplit.setKeyspace(ks);
    csplit.setColumnFamily(cf);
    csplit.setRangeBatchSize(sliceRangeSize);
    csplit.setSplitSize(splitSize);
    csplit.setHost(host);
    csplit.setPort(rpcPort);
    csplit.setSlicePredicateSize(slicePredicateSize);
    csplit.setPartitioner(partitioner);
    csplit.setColumnMapping(cassandraColumnMapping);
    results[i] = csplit;
  }
  return results;
}

Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split,
    JobConf jobConf, final Reporter reporter) throws IOException {
  HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split;

  List<String> columns = AbstractColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping());
  isTransposed = AbstractColumnSerDe.isTransposed(columns);


  List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

  if (columns.size() < readColIDs.size()) {
    throw new IOException("Cannot read more columns than the given table contains.");
  }

  org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit();
  Job job = new Job(jobConf);

  TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {
    @Override
    public void progress() {
      reporter.progress();
    }
  };

  SlicePredicate predicate = new SlicePredicate();

  if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) {
    SliceRange range = new SliceRange();
    AbstractType comparator = BytesType.instance;

    String comparatorType = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR);
    if (comparatorType != null && !comparatorType.equals("")) {
      try {
        comparator = TypeParser.parse(comparatorType);
      } catch (Exception ex) {
        throw new IOException("Comparator class not found.");
      }
    }

    String sliceStart = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START);
    String sliceEnd = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH);
    String reversed = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED);

    range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart));
    range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd));
    range.setReversed(reversed == null ? false : reversed.equals("true"));
    range.setCount(cassandraSplit.getSlicePredicateSize());
    predicate.setSlice_range(range);
  } else {
    int iKey = columns.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);
    predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs));
  }


  try {
    ConfigHelper.setInputColumnFamily(tac.getConfiguration(),
        cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily());

    ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate);
    ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize());
    ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + "");
    ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost());
    ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner());
    // Set Split Size
    ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize());

    CassandraHiveRecordReader rr = null;

    if(isTransposed && tac.getConfiguration().getBoolean(AbstractColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) {
      rr = new CassandraHiveRecordReader(new ColumnFamilyWideRowRecordReader(), isTransposed);
    } else {
      rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed);
    }
    rr.initialize(cfSplit, tac);

    return rr;

  } catch (Exception ie) {
    throw new IOException(ie);
  }
}

Source File: TestRingCache.java From stratio-cassandra with Apache License 2.0

4 votes

public TestRingCache(String keyspace)
{
    ConfigHelper.setOutputColumnFamily(conf, keyspace, "Standard1");
	ringCache = new RingCache(conf);
}

Source File: RingCache.java From stratio-cassandra with Apache License 2.0

4 votes

public RingCache(Configuration conf)
{
    this.conf = conf;
    this.partitioner = ConfigHelper.getOutputPartitioner(conf);
    refreshEndpointMap();
}

Source File: CqlNativeStorage.java From stratio-cassandra with Apache License 2.0

4 votes

/** set read configuration settings */
public void setLocation(String location, Job job) throws IOException
{
    conf = job.getConfiguration();
    setLocationFromUri(location);

    if (username != null && password != null)
    {
        ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);
        CqlConfigHelper.setUserNameAndPassword(conf, username, password);
    }
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setInputPartitioner(conf, partitionerClass);
    if (initHostAddress != null)
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    if (rpcPort != null)
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    if (nativePort != null)
        CqlConfigHelper.setInputNativePort(conf, nativePort);
    if (nativeCoreConnections != null)
        CqlConfigHelper.setInputCoreConnections(conf, nativeCoreConnections);
    if (nativeMaxConnections != null)
        CqlConfigHelper.setInputMaxConnections(conf, nativeMaxConnections);
    if (nativeMinSimultReqs != null)
        CqlConfigHelper.setInputMinSimultReqPerConnections(conf, nativeMinSimultReqs);
    if (nativeMaxSimultReqs != null)
        CqlConfigHelper.setInputMaxSimultReqPerConnections(conf, nativeMaxSimultReqs);
    if (nativeConnectionTimeout != null)
        CqlConfigHelper.setInputNativeConnectionTimeout(conf, nativeConnectionTimeout);
    if (nativeReadConnectionTimeout != null)
        CqlConfigHelper.setInputNativeReadConnectionTimeout(conf, nativeReadConnectionTimeout);
    if (nativeReceiveBufferSize != null)
        CqlConfigHelper.setInputNativeReceiveBufferSize(conf, nativeReceiveBufferSize);
    if (nativeSendBufferSize != null)
        CqlConfigHelper.setInputNativeSendBufferSize(conf, nativeSendBufferSize);
    if (nativeSolinger != null)
        CqlConfigHelper.setInputNativeSolinger(conf, nativeSolinger);
    if (nativeTcpNodelay != null)
        CqlConfigHelper.setInputNativeTcpNodelay(conf, nativeTcpNodelay);
    if (nativeReuseAddress != null)
        CqlConfigHelper.setInputNativeReuseAddress(conf, nativeReuseAddress);
    if (nativeKeepAlive != null)
        CqlConfigHelper.setInputNativeKeepAlive(conf, nativeKeepAlive);
    if (nativeAuthProvider != null)
        CqlConfigHelper.setInputNativeAuthProvider(conf, nativeAuthProvider);
    if (nativeSSLTruststorePath != null)
        CqlConfigHelper.setInputNativeSSLTruststorePath(conf, nativeSSLTruststorePath);
    if (nativeSSLKeystorePath != null)
        CqlConfigHelper.setInputNativeSSLKeystorePath(conf, nativeSSLKeystorePath);
    if (nativeSSLTruststorePassword != null)
        CqlConfigHelper.setInputNativeSSLTruststorePassword(conf, nativeSSLTruststorePassword);
    if (nativeSSLKeystorePassword != null)
        CqlConfigHelper.setInputNativeSSLKeystorePassword(conf, nativeSSLKeystorePassword);
    if (nativeSSLCipherSuites != null)
        CqlConfigHelper.setInputNativeSSLCipherSuites(conf, nativeSSLCipherSuites);

    ConfigHelper.setInputColumnFamily(conf, keyspace, column_family);
    setConnectionInformation();

    CqlConfigHelper.setInputCQLPageRowSize(conf, String.valueOf(pageSize));
    if (inputCql != null)
        CqlConfigHelper.setInputCql(conf, inputCql);
    if (columns != null)
        CqlConfigHelper.setInputColumns(conf, columns);
    if (whereClause != null)
        CqlConfigHelper.setInputWhereClauses(conf, whereClause);

    String whereClauseForPartitionFilter = getWhereClauseForPartitionFilter();
    String wc = whereClause != null && !whereClause.trim().isEmpty() 
                           ? whereClauseForPartitionFilter == null ? whereClause: String.format("%s AND %s", whereClause.trim(), whereClauseForPartitionFilter)
                           : whereClauseForPartitionFilter;

    if (wc != null)
    {
        logger.debug("where clause: {}", wc);
        CqlConfigHelper.setInputWhereClauses(conf, wc);
    } 
    if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null)
    {
        try
        {
            ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE)));
        }
        catch (NumberFormatException e)
        {
            throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
        }           
    }

    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
    if (loadSignature == null)
        loadSignature = location;

    initSchema(loadSignature);
}

Source File: WordCount.java From stratio-cassandra with Apache License 2.0

4 votes

public int run(String[] args) throws Exception
{
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR))
    {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }
    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++)
    {
        String columnName = "text" + i;

        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem"))
        {
            job.setCombinerClass(ReducerToFilesystem.class);
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        }
        else
        {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }

        job.setInputFormatClass(ColumnFamilyInputFormat.class);

        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
        SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        if (i == 4)
        {
            IndexExpression expr = new IndexExpression(ByteBufferUtil.bytes("int4"), IndexOperator.EQ, ByteBufferUtil.bytes(0));
            ConfigHelper.setInputRange(job.getConfiguration(), Arrays.asList(expr));
        }

        if (i == 5)
        {
            // this will cause the predicate to be ignored in favor of scanning everything as a wide row
            ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);
        }

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");

        job.waitForCompletion(true);
    }
    return 0;
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

4 votes

@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
            ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG);
    userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG);

    try {

        // create a Cluster instance
        String[] locations = split.getLocations();
        session = getInputSession(locations, conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //get negotiated serialization protocol
    nativeProtocolVersion = session.getContext().getProtocolVersion().getCode();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = conf.get(INPUT_CQL);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    LOG.trace("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    LOG.trace("created {}", rowIterator);
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

4 votes

public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);

    validateConfiguration(conf);

    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    LOG.trace("partitioner is {}", partitioner);

    // canonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>();

    try (CqlSession session = getInputSession(ConfigHelper.getInputInitialAddress(conf).split(","), conf)) {
        List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key != null) {
                if (!partitioner.preservesOrder()) {
                    throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving partitioner");
                }
                if (jobKeyRange.start_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                if (jobKeyRange.end_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key));
            } else if (jobKeyRange.start_token != null) {
                jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                        partitioner.getTokenFactory().fromString(jobKeyRange.end_token));
            } else {
                LOG.warn("ignoring jobKeyRange specified without start_key or start_token");
            }
        }

        Metadata metadata = session.getMetadata();

        // canonical ranges and nodes holding replicas
        Map<TokenRange, Set<Node>> masterRangeNodes = getRangeMap(keyspace, metadata);

        for (TokenRange range : masterRangeNodes.keySet()) {
            if (jobRange == null) {
                // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session)));
            } else {
                TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange);
                if (range.intersects(jobTokenRange)) {
                    for (TokenRange intersection : range.intersectWith(jobTokenRange)) {
                        // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

Source File: CrunchConfigHelper.java From hdfs2cass with Apache License 2.0

2 votes

/**
 * Set the keyspace and column family for the output of this job.
 * <p>
 * Use this instead of
 * {@link org.apache.cassandra.hadoop.ConfigHelper#setOutputColumnFamily(org.apache.hadoop.conf.Configuration, String, String)}
 * </p>
 */
public static void setOutputColumnFamily(Configuration conf, String keyspace, String columnFamily) {
  ConfigHelper.setOutputKeyspace(conf, keyspace);
  setOutputColumnFamily(conf, columnFamily);
}

org.apache.cassandra.hadoop.ConfigHelper Java Examples