org.apache.cassandra.hadoop.HadoopCompat Java Exaples

Source File: CqlBulkRecordWriter.java From stratio-cassandra with Apache License 2.0

6 votes

/**
 * The column values must correspond to the order in which
 * they appear in the insert stored procedure. 
 * 
 * Key is not used, so it can be null or any object.
 * </p>
 *
 * @param key
 *            any object or null.
 * @param values
 *            the values to write.
 * @throws IOException
 */
@Override
public void write(Object key, List<ByteBuffer> values) throws IOException
{
    prepareWriter();
    try
    {
        ((CQLSSTableWriter) writer).rawAddRow(values);
        
        if (null != progress)
            progress.progress();
        if (null != context)
            HadoopCompat.progress(context);
    } 
    catch (InvalidRequestException e)
    {
        throw new IOException("Error adding row with key: " + key, e);
    }
}

Source File: CrunchCqlBulkRecordWriter.java From hdfs2cass with Apache License 2.0

6 votes

@Override
public void write(final ByteBuffer ignoredKey, final CQLRecord record)  {
  prepareWriter();
  // To ensure Crunch doesn't reuse CQLSSTableWriter's objects
  List<ByteBuffer> bb = Lists.newArrayList();
  for (ByteBuffer v : record.getValues()) {
    bb.add(ByteBufferUtil.clone(v));
  }
  try {
    ((CQLSSTableWriter) writer).rawAddRow(bb);
    if (null != progress)
      progress.progress();
    if (null != context)
      HadoopCompat.progress(context);
  } catch (InvalidRequestException | IOException e) {
    LOG.error(e.getMessage());
    throw new CrunchRuntimeException("Error adding row : " + e.getMessage());
  }
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

5 votes

public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, Reporter reporter) throws IOException {
    TaskAttemptContext tac = HadoopCompat.newMapContext(
            jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)),
            null,
            null,
            null,
            new ReporterWrapper(reporter),
            null);


    RecordReaderGrakn recordReader = new RecordReaderGrakn();
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

5 votes

public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    TaskAttemptContext tac = HadoopCompat.newTaskAttemptContext(jobConf, new TaskAttemptID());
    List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac);
    InputSplit[] oldInputSplits = new InputSplit[newInputSplits.size()];
    for (int i = 0; i < newInputSplits.size(); i++) {
        oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i);
    }
    return oldInputSplits;
}

Source File: CqlNativeStorage.java From stratio-cassandra with Apache License 2.0

5 votes

/** set store configuration settings */
public void setStoreLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    setLocationFromUri(location);

    if (username != null && password != null)
        ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setOutputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
    {
        ConfigHelper.setOutputRpcPort(conf, rpcPort);
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    }
    if (initHostAddress != null)
    {
        ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    }

    ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
    CqlConfigHelper.setOutputCql(conf, outputQuery);

    setConnectionInformation();

    if (ConfigHelper.getOutputRpcPort(conf) == 0)
        throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getOutputInitialAddress(conf) == null)
        throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getOutputPartitioner(conf) == null)
        throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

    initSchema(storeSignature);
}

Source File: CrunchBulkRecordWriter.java From hdfs2cass with Apache License 2.0

5 votes

public CrunchBulkRecordWriter(TaskAttemptContext context) {
  Config.setClientMode(true);
  Config.setOutboundBindAny(true);
  this.conf = HadoopCompat.getConfiguration(context);
  this.context = context;
  int megabitsPerSec = Integer.parseInt(conf.get(STREAM_THROTTLE_MBITS, "0"));
  LOG.info("Setting stream throttling to " + megabitsPerSec);
  DatabaseDescriptor.setStreamThroughputOutboundMegabitsPerSec(megabitsPerSec);
  DatabaseDescriptor.setInterDCStreamThroughputOutboundMegabitsPerSec(megabitsPerSec);
  heartbeat = new ProgressHeartbeat(context, 120);
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

4 votes

public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);

    validateConfiguration(conf);

    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    LOG.trace("partitioner is {}", partitioner);

    // canonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>();

    try (CqlSession session = getInputSession(ConfigHelper.getInputInitialAddress(conf).split(","), conf)) {
        List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key != null) {
                if (!partitioner.preservesOrder()) {
                    throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving partitioner");
                }
                if (jobKeyRange.start_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                if (jobKeyRange.end_token != null) {
                    throw new IllegalArgumentException("only start_key supported");
                }
                jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key));
            } else if (jobKeyRange.start_token != null) {
                jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                        partitioner.getTokenFactory().fromString(jobKeyRange.end_token));
            } else {
                LOG.warn("ignoring jobKeyRange specified without start_key or start_token");
            }
        }

        Metadata metadata = session.getMetadata();

        // canonical ranges and nodes holding replicas
        Map<TokenRange, Set<Node>> masterRangeNodes = getRangeMap(keyspace, metadata);

        for (TokenRange range : masterRangeNodes.keySet()) {
            if (jobRange == null) {
                // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session)));
            } else {
                TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange);
                if (range.intersects(jobTokenRange)) {
                    for (TokenRange intersection : range.intersectWith(jobTokenRange)) {
                        // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0

4 votes

@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
            ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG);
    userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG);

    try {

        // create a Cluster instance
        String[] locations = split.getLocations();
        session = getInputSession(locations, conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //get negotiated serialization protocol
    nativeProtocolVersion = session.getContext().getProtocolVersion().getCode();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = conf.get(INPUT_CQL);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    LOG.trace("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    LOG.trace("created {}", rowIterator);
}

Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException
{
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE)
                  ? (int) this.split.getLength()
                  : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try
    {
        if (cluster != null)
            return;

        // create a Cluster instance
        String[] locations = split.getLocations();
        cluster = CqlConfigHelper.getInputCluster(locations, conf);
    }
    catch (Exception e)
    {
        throw new RuntimeException(e);
    }

    if (cluster != null)
        session = cluster.connect(quote(keyspace));

    if (session == null)
      throw new RuntimeException("Can't create connection session");

    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) ||
                                             StringUtils.isNotEmpty(userDefinedWhereClauses)))
    {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery))
        cqlQuery = buildQuery();
    logger.debug("cqlQuery {}", cqlQuery);

    rowIterator = new RowIterator();
    logger.debug("created {}", rowIterator);
}

Source File: CassandraStorage.java From stratio-cassandra with Apache License 2.0

4 votes

/** set read configuration settings */
public void setLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    setLocationFromUri(location);

    if (ConfigHelper.getInputSlicePredicate(conf) == null)
    {
        SliceRange range = new SliceRange(slice_start, slice_end, slice_reverse, limit);
        SlicePredicate predicate = new SlicePredicate().setSlice_range(range);
        ConfigHelper.setInputSlicePredicate(conf, predicate);
    }
    if (System.getenv(PIG_WIDEROW_INPUT) != null)
        widerows = Boolean.parseBoolean(System.getenv(PIG_WIDEROW_INPUT));
    if (System.getenv(PIG_USE_SECONDARY) != null)
        usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY));
    if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null)
    {
        try
        {
            ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE)));
        }
        catch (NumberFormatException e)
        {
            throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
        }           
    } 

    if (usePartitionFilter && getIndexExpressions() != null)
        ConfigHelper.setInputRange(conf, getIndexExpressions());

    if (username != null && password != null)
        ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);

    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setInputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    if (initHostAddress != null)
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);

    ConfigHelper.setInputColumnFamily(conf, keyspace, column_family, widerows);
    setConnectionInformation();

    if (ConfigHelper.getInputRpcPort(conf) == 0)
        throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
    if (loadSignature == null)
        loadSignature = location;
    initSchema(loadSignature);
}

Source File: CassandraStorage.java From stratio-cassandra with Apache License 2.0

4 votes

/** set store configuration settings */
public void setStoreLocation(String location, Job job) throws IOException
{
    conf = HadoopCompat.getConfiguration(job);
    
    // don't combine mappers to a single mapper per node
    conf.setBoolean("pig.noSplitCombination", true);
    setLocationFromUri(location);

    if (username != null && password != null)
        ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass!= null)
        ConfigHelper.setOutputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
    {
        ConfigHelper.setOutputRpcPort(conf, rpcPort);
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    }
    if (initHostAddress != null)
    {
        ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    }

    ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
    setConnectionInformation();

    if (ConfigHelper.getOutputRpcPort(conf) == 0)
        throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getOutputInitialAddress(conf) == null)
        throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getOutputPartitioner(conf) == null)
        throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

    // we have to do this again here for the check in writeColumnsFromTuple
    if (System.getenv(PIG_USE_SECONDARY) != null)
        usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY));

    initSchema(storeSignature);
}

Source File: CqlRecordWriter.java From stratio-cassandra with Apache License 2.0

2 votes

/**
 * Upon construction, obtain the map that this writer will use to collect
 * mutations, and the ring cache for the given keyspace.
 *
 * @param context the task attempt context
 * @throws IOException
 */
CqlRecordWriter(TaskAttemptContext context) throws IOException
{
    this(HadoopCompat.getConfiguration(context));
    this.context = context;
}

org.apache.cassandra.hadoop.HadoopCompat Java Examples