org.apache.cassandra.hadoop.HadoopCompat Java Examples
The following examples show how to use
org.apache.cassandra.hadoop.HadoopCompat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CqlBulkRecordWriter.java From stratio-cassandra with Apache License 2.0 | 6 votes |
/** * The column values must correspond to the order in which * they appear in the insert stored procedure. * * Key is not used, so it can be null or any object. * </p> * * @param key * any object or null. * @param values * the values to write. * @throws IOException */ @Override public void write(Object key, List<ByteBuffer> values) throws IOException { prepareWriter(); try { ((CQLSSTableWriter) writer).rawAddRow(values); if (null != progress) progress.progress(); if (null != context) HadoopCompat.progress(context); } catch (InvalidRequestException e) { throw new IOException("Error adding row with key: " + key, e); } }
Example #2
Source File: CrunchCqlBulkRecordWriter.java From hdfs2cass with Apache License 2.0 | 6 votes |
@Override public void write(final ByteBuffer ignoredKey, final CQLRecord record) { prepareWriter(); // To ensure Crunch doesn't reuse CQLSSTableWriter's objects List<ByteBuffer> bb = Lists.newArrayList(); for (ByteBuffer v : record.getValues()) { bb.add(ByteBufferUtil.clone(v)); } try { ((CQLSSTableWriter) writer).rawAddRow(bb); if (null != progress) progress.progress(); if (null != context) HadoopCompat.progress(context); } catch (InvalidRequestException | IOException e) { LOG.error(e.getMessage()); throw new CrunchRuntimeException("Error adding row : " + e.getMessage()); } }
Example #3
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 5 votes |
public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, Reporter reporter) throws IOException { TaskAttemptContext tac = HadoopCompat.newMapContext( jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)), null, null, null, new ReporterWrapper(reporter), null); RecordReaderGrakn recordReader = new RecordReaderGrakn(); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
Example #4
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 5 votes |
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { TaskAttemptContext tac = HadoopCompat.newTaskAttemptContext(jobConf, new TaskAttemptID()); List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac); InputSplit[] oldInputSplits = new InputSplit[newInputSplits.size()]; for (int i = 0; i < newInputSplits.size(); i++) { oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i); } return oldInputSplits; }
Example #5
Source File: CqlNativeStorage.java From stratio-cassandra with Apache License 2.0 | 5 votes |
/** set store configuration settings */ public void setStoreLocation(String location, Job job) throws IOException { conf = HadoopCompat.getConfiguration(job); setLocationFromUri(location); if (username != null && password != null) ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass!= null) ConfigHelper.setOutputPartitioner(conf, partitionerClass); if (rpcPort != null) { ConfigHelper.setOutputRpcPort(conf, rpcPort); ConfigHelper.setInputRpcPort(conf, rpcPort); } if (initHostAddress != null) { ConfigHelper.setOutputInitialAddress(conf, initHostAddress); ConfigHelper.setInputInitialAddress(conf, initHostAddress); } ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family); CqlConfigHelper.setOutputCql(conf, outputQuery); setConnectionInformation(); if (ConfigHelper.getOutputRpcPort(conf) == 0) throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getOutputInitialAddress(conf) == null) throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getOutputPartitioner(conf) == null) throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); initSchema(storeSignature); }
Example #6
Source File: CrunchBulkRecordWriter.java From hdfs2cass with Apache License 2.0 | 5 votes |
public CrunchBulkRecordWriter(TaskAttemptContext context) { Config.setClientMode(true); Config.setOutboundBindAny(true); this.conf = HadoopCompat.getConfiguration(context); this.context = context; int megabitsPerSec = Integer.parseInt(conf.get(STREAM_THROTTLE_MBITS, "0")); LOG.info("Setting stream throttling to " + megabitsPerSec); DatabaseDescriptor.setStreamThroughputOutboundMegabitsPerSec(megabitsPerSec); DatabaseDescriptor.setInterDCStreamThroughputOutboundMegabitsPerSec(megabitsPerSec); heartbeat = new ProgressHeartbeat(context, 120); }
Example #7
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 4 votes |
public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = HadoopCompat.getConfiguration(context); validateConfiguration(conf); keyspace = ConfigHelper.getInputKeyspace(conf); cfName = ConfigHelper.getInputColumnFamily(conf); partitioner = ConfigHelper.getInputPartitioner(conf); LOG.trace("partitioner is {}", partitioner); // canonical ranges, split into pieces, fetching the splits in parallel ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>()); List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>(); try (CqlSession session = getInputSession(ConfigHelper.getInputInitialAddress(conf).split(","), conf)) { List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>(); KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf); Range<Token> jobRange = null; if (jobKeyRange != null) { if (jobKeyRange.start_key != null) { if (!partitioner.preservesOrder()) { throw new UnsupportedOperationException("KeyRange based on keys can only be used with a order preserving partitioner"); } if (jobKeyRange.start_token != null) { throw new IllegalArgumentException("only start_key supported"); } if (jobKeyRange.end_token != null) { throw new IllegalArgumentException("only start_key supported"); } jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key), partitioner.getToken(jobKeyRange.end_key)); } else if (jobKeyRange.start_token != null) { jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token), partitioner.getTokenFactory().fromString(jobKeyRange.end_token)); } else { LOG.warn("ignoring jobKeyRange specified without start_key or start_token"); } } Metadata metadata = session.getMetadata(); // canonical ranges and nodes holding replicas Map<TokenRange, Set<Node>> masterRangeNodes = getRangeMap(keyspace, metadata); for (TokenRange range : masterRangeNodes.keySet()) { if (jobRange == null) { // for each tokenRange, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session))); } else { TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange); if (range.intersects(jobTokenRange)) { for (TokenRange intersection : range.intersectWith(jobTokenRange)) { // for each tokenRange, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session))); } } } } // wait until we have all the results back for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) { try { splits.addAll(futureInputSplits.get()); } catch (Exception e) { throw new IOException("Could not get input splits", e); } } } finally { executor.shutdownNow(); } Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }
Example #8
Source File: InputFormatGrakn.java From grakn with GNU Affero General Public License v3.0 | 4 votes |
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = conf.get(INPUT_CQL_COLUMNS_CONFIG); userDefinedWhereClauses = conf.get(INPUT_CQL_WHERE_CLAUSE_CONFIG); try { // create a Cluster instance String[] locations = split.getLocations(); session = getInputSession(locations, conf); } catch (Exception e) { throw new RuntimeException(e); } //get negotiated serialization protocol nativeProtocolVersion = session.getContext().getProtocolVersion().getCode(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = conf.get(INPUT_CQL); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) { cqlQuery = buildQuery(); } LOG.trace("cqlQuery {}", cqlQuery); rowIterator = new RowIterator(); LOG.trace("created {}", rowIterator); }
Example #9
Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0 | 4 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try { if (cluster != null) return; // create a Cluster instance String[] locations = split.getLocations(); cluster = CqlConfigHelper.getInputCluster(locations, conf); } catch (Exception e) { throw new RuntimeException(e); } if (cluster != null) session = cluster.connect(quote(keyspace)); if (session == null) throw new RuntimeException("Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) cqlQuery = buildQuery(); logger.debug("cqlQuery {}", cqlQuery); rowIterator = new RowIterator(); logger.debug("created {}", rowIterator); }
Example #10
Source File: CassandraStorage.java From stratio-cassandra with Apache License 2.0 | 4 votes |
/** set read configuration settings */ public void setLocation(String location, Job job) throws IOException { conf = HadoopCompat.getConfiguration(job); setLocationFromUri(location); if (ConfigHelper.getInputSlicePredicate(conf) == null) { SliceRange range = new SliceRange(slice_start, slice_end, slice_reverse, limit); SlicePredicate predicate = new SlicePredicate().setSlice_range(range); ConfigHelper.setInputSlicePredicate(conf, predicate); } if (System.getenv(PIG_WIDEROW_INPUT) != null) widerows = Boolean.parseBoolean(System.getenv(PIG_WIDEROW_INPUT)); if (System.getenv(PIG_USE_SECONDARY) != null) usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY)); if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null) { try { ConfigHelper.setInputSplitSize(conf, Integer.parseInt(System.getenv(PIG_INPUT_SPLIT_SIZE))); } catch (NumberFormatException e) { throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e); } } if (usePartitionFilter && getIndexExpressions() != null) ConfigHelper.setInputRange(conf, getIndexExpressions()); if (username != null && password != null) ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass!= null) ConfigHelper.setInputPartitioner(conf, partitionerClass); if (rpcPort != null) ConfigHelper.setInputRpcPort(conf, rpcPort); if (initHostAddress != null) ConfigHelper.setInputInitialAddress(conf, initHostAddress); ConfigHelper.setInputColumnFamily(conf, keyspace, column_family, widerows); setConnectionInformation(); if (ConfigHelper.getInputRpcPort(conf) == 0) throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getInputInitialAddress(conf) == null) throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getInputPartitioner(conf) == null) throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); if (loadSignature == null) loadSignature = location; initSchema(loadSignature); }
Example #11
Source File: CassandraStorage.java From stratio-cassandra with Apache License 2.0 | 4 votes |
/** set store configuration settings */ public void setStoreLocation(String location, Job job) throws IOException { conf = HadoopCompat.getConfiguration(job); // don't combine mappers to a single mapper per node conf.setBoolean("pig.noSplitCombination", true); setLocationFromUri(location); if (username != null && password != null) ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass!= null) ConfigHelper.setOutputPartitioner(conf, partitionerClass); if (rpcPort != null) { ConfigHelper.setOutputRpcPort(conf, rpcPort); ConfigHelper.setInputRpcPort(conf, rpcPort); } if (initHostAddress != null) { ConfigHelper.setOutputInitialAddress(conf, initHostAddress); ConfigHelper.setInputInitialAddress(conf, initHostAddress); } ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family); setConnectionInformation(); if (ConfigHelper.getOutputRpcPort(conf) == 0) throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getOutputInitialAddress(conf) == null) throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getOutputPartitioner(conf) == null) throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); // we have to do this again here for the check in writeColumnsFromTuple if (System.getenv(PIG_USE_SECONDARY) != null) usePartitionFilter = Boolean.parseBoolean(System.getenv(PIG_USE_SECONDARY)); initSchema(storeSignature); }
Example #12
Source File: CqlRecordWriter.java From stratio-cassandra with Apache License 2.0 | 2 votes |
/** * Upon construction, obtain the map that this writer will use to collect * mutations, and the ring cache for the given keyspace. * * @param context the task attempt context * @throws IOException */ CqlRecordWriter(TaskAttemptContext context) throws IOException { this(HadoopCompat.getConfiguration(context)); this.context = context; }