Java Code Examples for org.apache.hadoop.mapred.JobConf#getInputFormat()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#getInputFormat() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 5 votes |
public static InputFormat<?, ?> getInputFormat(Table table, final HiveConf hiveConf) { try (final ContextClassLoaderSwapper ccls = ContextClassLoaderSwapper.newInstance()) { final JobConf job = new JobConf(hiveConf); final Class<? extends InputFormat> inputFormatClazz = getInputFormatClass(job, table, null); job.setInputFormat(inputFormatClazz); return job.getInputFormat(); } }
Example 2
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 5 votes |
public static InputFormat<?, ?> getInputFormat(Table table, final HiveConf hiveConf) { try (final ContextClassLoaderSwapper ccls = ContextClassLoaderSwapper.newInstance()) { final JobConf job = new JobConf(hiveConf); final Class<? extends InputFormat> inputFormatClazz = getInputFormatClass(job, table, null); job.setInputFormat(inputFormatClazz); return job.getInputFormat(); } }
Example 3
Source File: AbstractEvaluatorToPartitionStrategy.java From reef with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") AbstractEvaluatorToPartitionStrategy( final String inputFormatClassName, final Set<String> serializedDataPartitions) { LOG.fine("AbstractEvaluatorToPartitionStrategy injected"); Validate.notEmpty(inputFormatClassName); Validate.notEmpty(serializedDataPartitions); locationToSplits = new ConcurrentHashMap<>(); evaluatorToSplits = new ConcurrentHashMap<>(); unallocatedSplits = new LinkedBlockingQueue<>(); setUp(); final Map<DistributedDataSetPartition, InputSplit[]> splitsPerPartition = new HashMap<>(); for (final String serializedDataPartition : serializedDataPartitions) { final DistributedDataSetPartition dp = DistributedDataSetPartitionSerializer.deserialize(serializedDataPartition); final ExternalConstructor<JobConf> jobConfExternalConstructor = new JobConfExternalConstructor( inputFormatClassName, dp.getPath()); try { final JobConf jobConf = jobConfExternalConstructor.newInstance(); final InputFormat inputFormat = jobConf.getInputFormat(); final InputSplit[] inputSplits = inputFormat.getSplits(jobConf, dp.getDesiredSplits()); if (LOG.isLoggable(Level.FINEST)) { LOG.log(Level.FINEST, "Splits for partition: {0} {1}", new Object[] {dp, Arrays.toString(inputSplits)}); } this.totalNumberOfSplits += inputSplits.length; splitsPerPartition.put(dp, inputSplits); } catch (final IOException e) { throw new RuntimeException("Unable to get InputSplits using the specified InputFormat", e); } } init(splitsPerPartition); LOG.log(Level.FINE, "Total Number of splits: {0}", this.totalNumberOfSplits); }
Example 4
Source File: InputSampler.java From RDFS with Apache License 2.0 | 5 votes |
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K,V> void writePartitionFile(JobConf job, Sampler<K,V> sampler) throws IOException { final InputFormat<K,V> inf = (InputFormat<K,V>) job.getInputFormat(); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job)); FileSystem fs = dst.getFileSystem(job); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for(int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
Example 5
Source File: MRHelpers.java From incubator-tez with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Private public static org.apache.hadoop.mapred.InputSplit[] generateOldSplits( JobConf jobConf, String inputFormatName, int numTasks) throws IOException { org.apache.hadoop.mapred.InputFormat inputFormat = jobConf.getInputFormat(); org.apache.hadoop.mapred.InputFormat finalInputFormat = inputFormat; if (inputFormatName != null && !inputFormatName.isEmpty()) { if (!inputFormat.getClass().equals( org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class)){ throw new TezUncheckedException( "Expected " + org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class.getName() + " in conf but got: " + inputFormat.getClass().getName()); } try { inputFormat = (org.apache.hadoop.mapred.InputFormat) ReflectionUtils.newInstance(Class.forName(inputFormatName), jobConf); } catch (ClassNotFoundException e) { throw new TezUncheckedException(e); } org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat groupedFormat = new org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat(); groupedFormat.setConf(jobConf); groupedFormat.setInputFormat(inputFormat); groupedFormat.setDesiredNumberOfSplits(numTasks); finalInputFormat = groupedFormat; } org.apache.hadoop.mapred.InputSplit[] splits = finalInputFormat .getSplits(jobConf, jobConf.getNumMapTasks()); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(splits, new OldInputSplitComparator()); return splits; }
Example 6
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) private static org.apache.hadoop.mapred.InputSplit[] generateOldSplits( JobConf jobConf, boolean groupSplits, boolean sortSplits, int numTasks) throws IOException { // This is the real InputFormat org.apache.hadoop.mapred.InputFormat inputFormat; try { inputFormat = jobConf.getInputFormat(); } catch (Exception e) { throw new TezUncheckedException(e); } org.apache.hadoop.mapred.InputFormat finalInputFormat = inputFormat; if (groupSplits) { org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat groupedFormat = new org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat(); groupedFormat.setConf(jobConf); groupedFormat.setInputFormat(inputFormat); groupedFormat.setDesiredNumberOfSplits(numTasks); finalInputFormat = groupedFormat; } else { finalInputFormat = inputFormat; } org.apache.hadoop.mapred.InputSplit[] splits = finalInputFormat .getSplits(jobConf, jobConf.getNumMapTasks()); if (sortSplits) { // sort the splits into order based on size, so that the biggest // go first Arrays.sort(splits, new OldInputSplitComparator()); } return splits; }
Example 7
Source File: InputSampler.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K,V> void writePartitionFile(JobConf job, Sampler<K,V> sampler) throws IOException { final InputFormat<K,V> inf = (InputFormat<K,V>) job.getInputFormat(); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job)); FileSystem fs = dst.getFileSystem(job); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for(int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
Example 8
Source File: InputFormatExternalConstructor.java From reef with Apache License 2.0 | 4 votes |
@Inject public InputFormatExternalConstructor(final JobConf jobConf) { this.jobConf = jobConf; inputFormat = jobConf.getInputFormat(); }