org.apache.cassandra.hadoop.HadoopCompat Java Examples

The following examples show how to use org.apache.cassandra.hadoop.HadoopCompat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CqlBulkRecordWriter.java    From stratio-cassandra with Apache License 2.0 6 votes vote down vote up
/**
 * The column values must correspond to the order in which
 * they appear in the insert stored procedure. 
 * 
 * Key is not used, so it can be null or any object.
 * </p>
 *
 * @param key
 *            any object or null.
 * @param values
 *            the values to write.
 * @throws IOException
 */
@Override
public void write(Object key, List<ByteBuffer> values) throws IOException
{
    prepareWriter();
    try
    {
        ((CQLSSTableWriter) writer).rawAddRow(values);
        
        if (null != progress)
            progress.progress();
        if (null != context)
            HadoopCompat.progress(context);
    } 
    catch (InvalidRequestException e)
    {
        throw new IOException("Error adding row with key: " + key, e);
    }
}
 
Example #2
Source File: CrunchCqlBulkRecordWriter.java    From hdfs2cass with Apache License 2.0 6 votes vote down vote up
@Override
public void write(final ByteBuffer ignoredKey, final CQLRecord record)  {
  prepareWriter();
  // To ensure Crunch doesn't reuse CQLSSTableWriter's objects
  List<ByteBuffer> bb = Lists.newArrayList();
  for (ByteBuffer v : record.getValues()) {
    bb.add(ByteBufferUtil.clone(v));
  }
  try {
    ((CQLSSTableWriter) writer).rawAddRow(bb);
    if (null != progress)
      progress.progress();
    if (null != context)
      HadoopCompat.progress(context);
  } catch (InvalidRequestException | IOException e) {
    LOG.error(e.getMessage());
    throw new CrunchRuntimeException("Error adding row : " + e.getMessage());
  }
}
 
Example #3
Source File: InputFormatGrakn.java    From grakn with GNU Affero General Public License v3.0 5 votes vote down vote up
public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, Reporter reporter) throws IOException {
    TaskAttemptContext tac = HadoopCompat.newMapContext(
            jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)),
            null,
            null,
            null,
            new ReporterWrapper(reporter),
            null);


    RecordReaderGrakn recordReader = new RecordReaderGrakn();
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}
 
Example #4
Source File: InputFormatGrakn.java    From grakn with GNU Affero General Public License v3.0 5 votes vote down vote up
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    TaskAttemptContext tac = HadoopCompat.newTaskAttemptContext(jobConf, new TaskAttemptID());
    List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac);
    InputSplit[] oldInputSplits = new InputSplit[newInputSplits.size()];
    for (int i = 0; i < newInputSplits.size(); i++) {
        oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i);
    }
    return oldInputSplits;
}
 
Example #5
Source File: CqlNativeStorage.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
/** set store configuration settings */
public void setStoreLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    setLocationFromUri(location);

    if (username != null && password != null)
        ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setOutputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
    {
        ConfigHelper.setOutputRpcPort(conf, rpcPort);
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    }
    if (initHostAddress != null)
    {
        ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    }

    ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
    CqlConfigHelper.setOutputCql(conf, outputQuery);

    setConnectionInformation();

    if (ConfigHelper.getOutputRpcPort(conf) == 0)
        throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getOutputInitialAddress(conf) == null)
        throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getOutputPartitioner(conf) == null)
        throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

    initSchema(storeSignature);
}
 
Example #6
Source File: CrunchBulkRecordWriter.java    From hdfs2cass with Apache License 2.0 5 votes vote down vote up
public CrunchBulkRecordWriter(TaskAttemptContext context) {
  Config.setClientMode(true);
  Config.setOutboundBindAny(true);
  this.conf = HadoopCompat.getConfiguration(context);
  this.context = context;
  int megabitsPerSec = Integer.parseInt(conf.get(STREAM_THROTTLE_MBITS, "0"));
  LOG.info("Setting stream throttling to " + megabitsPerSec);
  DatabaseDescriptor.setStreamThroughputOutboundMegabitsPerSec(megabitsPerSec);
  DatabaseDescriptor.setInterDCStreamThroughputOutboundMegabitsPerSec(megabitsPerSec);
  heartbeat = new ProgressHeartbeat(context, 120);
}
 
Example #7
Source File: InputFormatGrakn.java    From grakn with GNU Affero General Public License v3.0 4 votes vote down vote up
public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);

    validateConfiguration(conf);

    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    LOG.trace("partitioner is {}", partitioner);

    // canonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>();

    try (CqlSession session = getInputSession(ConfigHelper.getInputInitialAddress(conf).split(","), conf)) {
        List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key != null) {
                if (!partitioner.preservesOrder()) {
                    throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving partitioner");
                }
                if (jobKeyRange.start_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                if (jobKeyRange.end_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key));
            } else if (jobKeyRange.start_token != null) {
                jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                        partitioner.getTokenFactory().fromString(jobKeyRange.end_token));
            } else {
                LOG.warn("ignoring jobKeyRange specified without start_key or start_token");
            }
        }

        Metadata metadata = session.getMetadata();

        // canonical ranges and nodes holding replicas
        Map<TokenRange, Set<Node>> masterRangeNodes = getRangeMap(keyspace, metadata);

        for (TokenRange range : masterRangeNodes.keySet()) {
            if (jobRange == null) {
                // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session)));
            } else {
                TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange);
                if (range.intersects(jobTokenRange)) {
                    for (TokenRange intersection : range.intersectWith(jobTokenRange)) {
                        // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}
 
Example #8
Source File: InputFormatGrakn.java    From grakn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
            ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG);
    userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG);

    try {

        // create a Cluster instance
        String[] locations = split.getLocations();
        session = getInputSession(locations, conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //get negotiated serialization protocol
    nativeProtocolVersion = session.getContext().getProtocolVersion().getCode();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = conf.get(INPUT_CQL);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    LOG.trace("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    LOG.trace("created {}", rowIterator);
}
 
Example #9
Source File: CqlRecordReader.java    From stratio-cassandra with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}
 
Example #10
Source File: CassandraStorage.java    From stratio-cassandra with Apache License 2.0 4 votes vote down vote up
/** set read configuration settings */
public void setLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    setLocationFromUri(location);

    if (ConfigHelper.getInputSlicePredicate(conf) == null)
    {
        SliceRange range = new SliceRange(slice_start, slice_end, slice_reverse, limit);
        SlicePredicate predicate = new SlicePredicate().setSlice_range(range);
        ConfigHelper.setInputSlicePredicate(conf, predicate);
    }
    if (System.getenv(PIG_WIDEROW_INPUT) != null)
        widerows = Boolean.parseBoolean(System.getenv(PIG_WIDEROW_INPUT));
    if (System.getenv(PIG_USE_SECONDARY) != null)
        usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY));
    if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null)
    {
        try
        {
            ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE)));
        }
        catch (NumberFormatException e)
        {
            throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
        }           
    } 

    if (usePartitionFilter && getIndexExpressions() != null)
        ConfigHelper.setInputRange(conf, getIndexExpressions());

    if (username != null && password != null)
        ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);

    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setInputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    if (initHostAddress != null)
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);

    ConfigHelper.setInputColumnFamily(conf, keyspace, column_family, widerows);
    setConnectionInformation();

    if (ConfigHelper.getInputRpcPort(conf) == 0)
        throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
    if (loadSignature == null)
        loadSignature = location;
    initSchema(loadSignature);
}
 
Example #11
Source File: CassandraStorage.java    From stratio-cassandra with Apache License 2.0 4 votes vote down vote up
/** set store configuration settings */
public void setStoreLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    
    // don't combine mappers to a single mapper per node
    conf.setBoolean("pig.noSplitCombination", true);
    setLocationFromUri(location);

    if (username != null && password != null)
        ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setOutputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
    {
        ConfigHelper.setOutputRpcPort(conf, rpcPort);
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    }
    if (initHostAddress != null)
    {
        ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    }

    ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
    setConnectionInformation();

    if (ConfigHelper.getOutputRpcPort(conf) == 0)
        throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getOutputInitialAddress(conf) == null)
        throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getOutputPartitioner(conf) == null)
        throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

    // we have to do this again here for the check in writeColumnsFromTuple
    if (System.getenv(PIG_USE_SECONDARY) != null)
        usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY));

    initSchema(storeSignature);
}
 
Example #12
Source File: CqlRecordWriter.java    From stratio-cassandra with Apache License 2.0 2 votes vote down vote up
/**
 * Upon construction, obtain the map that this writer will use to collect
 * mutations, and the ring cache for the given keyspace.
 *
 * @param context the task attempt context
 * @throws IOException
 */
CqlRecordWriter(TaskAttemptContext context) throws IOException
{
    this(HadoopCompat.getConfiguration(context));
    this.context = context;
}