Java Code Examples for org.apache.hadoop.mapreduce.InputSplit#getLength()
The following examples show how to use
org.apache.hadoop.mapreduce.InputSplit#getLength() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: kylin-on-parquet-v2 File: AbstractHadoopJob.java License: Apache License 2.0 | 6 votes |
public static double getTotalMapInputMB(Job job) throws ClassNotFoundException, IOException, InterruptedException, JobException { if (job == null) { throw new JobException("Job is null"); } long mapInputBytes = 0; InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); for (InputSplit split : input.getSplits(job)) { mapInputBytes += split.getLength(); } // 0 input bytes is possible when the segment range hits no partition on a partitioned hive table (KYLIN-2470) if (mapInputBytes == 0) { logger.warn("Map input splits are 0 bytes, something is wrong?"); } double totalMapInputMB = (double) mapInputBytes / 1024 / 1024; return totalMapInputMB; }
Example 2
Source Project: spork File: MapRedUtil.java License: Apache License 2.0 | 6 votes |
public String inputSplitToString(InputSplit[] splits) throws IOException, InterruptedException { // debugging purpose only StringBuilder st = new StringBuilder(); st.append("Number of splits :" + splits.length+"\n"); long len = 0; for (InputSplit split: splits) len += split.getLength(); st.append("Total Length = "+ len +"\n"); for (int i = 0; i < splits.length; i++) { st.append("Input split["+i+"]:\n Length = "+ splits[i].getLength()+"\n Locations:\n"); for (String location : splits[i].getLocations()) st.append(" "+location+"\n"); st.append("\n-----------------------\n"); } return st.toString(); }
Example 3
Source Project: kylin File: AbstractHadoopJob.java License: Apache License 2.0 | 6 votes |
public static double getTotalMapInputMB(Job job) throws ClassNotFoundException, IOException, InterruptedException, JobException { if (job == null) { throw new JobException("Job is null"); } long mapInputBytes = 0; InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); for (InputSplit split : input.getSplits(job)) { mapInputBytes += split.getLength(); } // 0 input bytes is possible when the segment range hits no partition on a partitioned hive table (KYLIN-2470) if (mapInputBytes == 0) { logger.warn("Map input splits are 0 bytes, something is wrong?"); } double totalMapInputMB = (double) mapInputBytes / 1024 / 1024; return totalMapInputMB; }
Example 4
Source Project: Kylin File: AbstractHadoopJob.java License: Apache License 2.0 | 6 votes |
protected double getTotalMapInputMB() throws ClassNotFoundException, IOException, InterruptedException, JobException { if (job == null) { throw new JobException("Job is null"); } long mapInputBytes = 0; InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); for (InputSplit split : input.getSplits(job)) { mapInputBytes += split.getLength(); } if (mapInputBytes == 0) { throw new IllegalArgumentException("Map input splits are 0 bytes, something is wrong!"); } double totalMapInputMB = (double) mapInputBytes / 1024 / 1024; return totalMapInputMB; }
Example 5
Source Project: marklogic-contentpump File: AggregateXMLReader.java License: Apache License 2.0 | 6 votes |
protected void initStreamReader(InputSplit inSplit) throws IOException, InterruptedException { start = 0; end = inSplit.getLength(); overflow = false; fInputStream = openFile(inSplit, true); if (fInputStream == null) { return; } try { xmlSR = f.createXMLStreamReader(fInputStream, encoding); } catch (XMLStreamException e) { LOG.error(e.getMessage(), e); } if (useAutomaticId) { idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } }
Example 6
Source Project: marklogic-contentpump File: DelimitedTextReader.java License: Apache License 2.0 | 6 votes |
protected void initParser(InputSplit inSplit) throws IOException, InterruptedException { fileIn = openFile(inSplit, true); if (fileIn == null) { return; } instream = new InputStreamReader(fileIn, encoding); bytesRead = 0; fileLen = inSplit.getLength(); if (uriName == null) { generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false); if (generateId) { idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } else { uriId = 0; } } parser = new CSVParser(instream, CSVParserFormatter. getFormat(delimiter, encapsulator, true, true)); parserIterator = parser.iterator(); }
Example 7
Source Project: beam File: HadoopFormatIO.java License: Apache License 2.0 | 6 votes |
/** * This is a helper function to compute splits. This method will also calculate size of the data * being read. Note: This method is executed exactly once and the splits are retrieved and * cached in this. These splits are further used by split() and getEstimatedSizeBytes(). */ @VisibleForTesting void computeSplitsIfNecessary() throws IOException, InterruptedException { if (inputSplits != null) { return; } createInputFormatInstance(); List<InputSplit> splits = inputFormatObj.getSplits(Job.getInstance(conf.get())); if (splits == null) { throw new IOException("Error in computing splits, getSplits() returns null."); } if (splits.isEmpty()) { throw new IOException("Error in computing splits, getSplits() returns a empty list"); } boundedSourceEstimatedSize = 0; inputSplits = new ArrayList<>(); for (InputSplit inputSplit : splits) { if (inputSplit == null) { throw new IOException( "Error in computing splits, split is null in InputSplits list " + "populated by getSplits() : "); } boundedSourceEstimatedSize += inputSplit.getLength(); inputSplits.add(new SerializableSplit(inputSplit)); } }
Example 8
Source Project: marklogic-contentpump File: BinaryLoader.java License: Apache License 2.0 | 6 votes |
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { bytesTotal = inSplit.getLength(); Path file = ((FileSplit)inSplit).getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); key.set(file.toString()); byte[] buf = new byte[(int)inSplit.getLength()]; System.out.println("split length: " + inSplit.getLength()); try { fileIn.readFully(buf); value.set(buf, 0, (int) inSplit.getLength()); System.out.println("value length: " + value.getBytes().length); hasNext = true; } catch (Exception e) { hasNext = false; } finally { fileIn.close(); } }
Example 9
Source Project: datawave File: CSVReaderBase.java License: Apache License 2.0 | 5 votes |
public void initializeTotalSize(final InputSplit genericSplit) throws IOException { try { totalSize = genericSplit.getLength() * 4l; } catch (InterruptedException ex) { throw new IOException("Interrupted Exception thrown while attempting to get split length", ex); } }
Example 10
Source Project: datawave File: TabletSplitSplit.java License: Apache License 2.0 | 5 votes |
/** * Add an InputSplit to this collection. * * @throws IOException * If capacity was not specified during construction or if capacity has been reached. * @throws InterruptedException */ public void add(InputSplit s) throws IOException, InterruptedException { if (null == splits) { throw new IOException("Uninitialized InputSplit"); } if (fill == splits.length) { throw new IOException("Too many splits"); } splits[fill++] = s; totsize += s.getLength(); }
Example 11
Source Project: hadoop File: CompositeInputSplit.java License: Apache License 2.0 | 5 votes |
/** * Add an InputSplit to this collection. * @throws IOException If capacity was not specified during construction * or if capacity has been reached. */ public void add(InputSplit s) throws IOException, InterruptedException { if (null == splits) { throw new IOException("Uninitialized InputSplit"); } if (fill == splits.length) { throw new IOException("Too many splits"); } splits[fill++] = s; totsize += s.getLength(); }
Example 12
Source Project: hadoop File: SequenceFileAsBinaryInputFormat.java License: Apache License 2.0 | 5 votes |
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Path path = ((FileSplit)split).getPath(); Configuration conf = context.getConfiguration(); FileSystem fs = path.getFileSystem(conf); this.in = new SequenceFile.Reader(fs, path, conf); this.end = ((FileSplit)split).getStart() + split.getLength(); if (((FileSplit)split).getStart() > in.getPosition()) { in.sync(((FileSplit)split).getStart()); // sync to start } this.start = in.getPosition(); vbytes = in.createValueBytes(); done = start >= end; }
Example 13
Source Project: incubator-tez File: TezGroupedSplit.java License: Apache License 2.0 | 5 votes |
public void addSplit(InputSplit split) { wrappedSplits.add(split); try { length += split.getLength(); } catch (Exception e) { throw new TezUncheckedException(e); } }
Example 14
Source Project: marklogic-contentpump File: RDFReader.java License: Apache License 2.0 | 5 votes |
protected void initStream(InputSplit inSplit) throws IOException, InterruptedException { FSDataInputStream in = openFile(inSplit, false); if (in == null) { return; } long size = inSplit.getLength(); initParser(file.toUri().toASCIIString(), size); parse(file.getName(), in); }
Example 15
Source Project: marklogic-contentpump File: DelimitedJSONReader.java License: Apache License 2.0 | 5 votes |
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { /* Initialization in super class */ initConfig(context); /* Get file(s) in input split */ setFile(((FileSplit) inSplit).getPath()); // Initialize reader properties generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI,false); if (generateId){ idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } else { uriName = conf.get(CONF_INPUT_URI_ID, null); mapper = new ObjectMapper(); } bytesRead = 0; totalBytes = inSplit.getLength(); /* Check file status */ fs = file.getFileSystem(context.getConfiguration()); FileStatus status = fs.getFileStatus(file); if (status.isDirectory()) { iterator = new FileIterator((FileSplit)inSplit, context); inSplit = iterator.next(); } /* Initialize buffered reader */ initFileStream(inSplit); }
Example 16
Source Project: big-c File: CompositeInputSplit.java License: Apache License 2.0 | 5 votes |
/** * Add an InputSplit to this collection. * @throws IOException If capacity was not specified during construction * or if capacity has been reached. */ public void add(InputSplit s) throws IOException, InterruptedException { if (null == splits) { throw new IOException("Uninitialized InputSplit"); } if (fill == splits.length) { throw new IOException("Too many splits"); } splits[fill++] = s; totsize += s.getLength(); }
Example 17
Source Project: big-c File: SequenceFileAsBinaryInputFormat.java License: Apache License 2.0 | 5 votes |
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Path path = ((FileSplit)split).getPath(); Configuration conf = context.getConfiguration(); FileSystem fs = path.getFileSystem(conf); this.in = new SequenceFile.Reader(fs, path, conf); this.end = ((FileSplit)split).getStart() + split.getLength(); if (((FileSplit)split).getStart() > in.getPosition()) { in.sync(((FileSplit)split).getStart()); // sync to start } this.start = in.getPosition(); vbytes = in.createValueBytes(); done = start >= end; }
Example 18
Source Project: big-c File: JobSplit.java License: Apache License 2.0 | 5 votes |
public SplitMetaInfo(InputSplit split, long startOffset) throws IOException { try { this.locations = split.getLocations(); this.inputDataLength = split.getLength(); this.startOffset = startOffset; } catch (InterruptedException ie) { throw new IOException(ie); } }
Example 19
Source Project: marklogic-contentpump File: WikiLoader.java License: Apache License 2.0 | 4 votes |
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { Path file = ((FileSplit)inSplit).getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); byte[] buf = new byte[BUFFER_SIZE]; long bytesTotal = inSplit.getLength(); long start = ((FileSplit)inSplit).getStart(); fileIn.seek(start); long bytesRead = 0; StringBuilder pages = new StringBuilder(); int sindex = -1; while (true) { int length = (int)Math.min(bytesTotal - bytesRead, buf.length); int read = fileIn.read(buf, 0, length); if (read == -1) { System.out.println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead); break; } bytesRead += read; String temp = new String(new String(buf, 0, read)); if (sindex == -1) { // haven't found the start yet sindex = temp.indexOf(BEGIN_PAGE_TAG); if (sindex > -1) { pages.append(temp.substring(sindex)); } } else if (bytesRead < bytesTotal) { // haven't completed the split pages.append(temp); } else { // reached the end of this split // look for end int eindex = 0; if (temp.contains(END_DOC_TAG) || // reached the end of doc temp.endsWith(END_PAGE_TAG)) { eindex = temp.lastIndexOf(END_PAGE_TAG); pages.append(temp.substring(0, eindex + END_PAGE_TAG.length())); System.out.println("Found end of doc."); } else { // need to read ahead to look for end of page while (true) { read = fileIn.read(buf, 0, READ_AHEAD_SIZE); if (read == -1) { // no more to read System.out.println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead); System.out.println(temp); break; } bytesRead += read; // look for end temp = new String(buf, 0, read); eindex = temp.indexOf(END_PAGE_TAG); if (eindex > -1) { pages.append(temp.substring(0, eindex + END_PAGE_TAG.length())); break; } else { pages.append(temp); } } } break; } } fileIn.close(); articles = WikiModelProcessor.process(pages); }
Example 20
Source Project: big-c File: JobSplit.java License: Apache License 2.0 | 4 votes |
public TaskSplitMetaInfo(InputSplit split, long startOffset) throws InterruptedException, IOException { this(new TaskSplitIndex("", startOffset), split.getLocations(), split.getLength()); }