/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.tugraz.sysds.runtime.controlprogram.parfor; import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.Map.Entry; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; import org.tugraz.sysds.conf.ConfigurationManager; import org.tugraz.sysds.runtime.DMLRuntimeException; import org.tugraz.sysds.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat; import org.tugraz.sysds.runtime.controlprogram.ParForProgramBlock.PartitionFormat; import org.tugraz.sysds.runtime.controlprogram.caching.MatrixObject; import org.tugraz.sysds.runtime.controlprogram.parfor.util.Cell; import org.tugraz.sysds.runtime.controlprogram.parfor.util.IDSequence; import org.tugraz.sysds.runtime.controlprogram.parfor.util.StagingFileUtils; import org.tugraz.sysds.runtime.io.IOUtilFunctions; import org.tugraz.sysds.runtime.matrix.data.IJV; import org.tugraz.sysds.runtime.matrix.data.InputInfo; import org.tugraz.sysds.runtime.matrix.data.MatrixBlock; import org.tugraz.sysds.runtime.matrix.data.MatrixCell; import org.tugraz.sysds.runtime.matrix.data.MatrixIndexes; import org.tugraz.sysds.runtime.matrix.data.OutputInfo; import org.tugraz.sysds.runtime.util.FastStringTokenizer; import org.tugraz.sysds.runtime.util.LocalFileUtils; /** * Partitions a given matrix into row or column partitions with a two pass-approach. * In the first phase the input matrix is read from HDFS and sorted into block partitions * in a staging area in the local file system according to the partition format. * In order to allow for scalable partitioning, we process one block at a time. * Furthermore, in the second phase, all blocks of a partition are append to a sequence file * on HDFS. Block-wise partitioning and write-once semantics of sequence files require the * indirection over the local staging area. For scalable computation, we process one * sequence file at a time. * * NOTE: For the resulting partitioned matrix, we store block and cell indexes wrt partition boundaries. * This means that the partitioned matrix CANNOT be read as a traditional matrix because there are * for example multiple blocks with same index (while the actual index is encoded in the path). * In order to enable full read of partition matrices, data converter would need to handle negative * row/col offsets for partitioned read. Currently not done in order to avoid overhead from normal read * and since partitioning only applied if exclusively indexed access. * * */ public class DataPartitionerLocal extends DataPartitioner { private static final boolean PARALLEL = true; private IDSequence _seq = null; private MatrixBlock _reuseBlk = null; private int _par = -1; /** * DataPartitionerLocal constructor. * * @param dpf data partitionformat * @param par -1 for serial otherwise number of threads, can be ignored by implementation */ public DataPartitionerLocal(PartitionFormat dpf, int par) { super(dpf._dpf, dpf._N); if( dpf.isBlockwise() ) throw new DMLRuntimeException("Data partitioning formt '"+dpf+"' not supported by DataPartitionerLocal" ); _seq = new IDSequence(); _par = (par > 0) ? par : 1; } @Override protected void partitionMatrix(MatrixObject in, String fnameNew, InputInfo ii, OutputInfo oi, long rlen, long clen, int blen) { //force writing to disk (typically not required since partitioning only applied if dataset exceeds CP size) in.exportData(); //written to disk iff dirty String fname = in.getFileName(); String fnameStaging = LocalFileUtils.getUniqueWorkingDir( LocalFileUtils.CATEGORY_PARTITIONING ); //reblock input matrix if( ii == InputInfo.TextCellInputInfo ) partitionTextCell( fname, fnameStaging, fnameNew, rlen, clen, blen ); else if( ii == InputInfo.BinaryCellInputInfo ) partitionBinaryCell( fname, fnameStaging, fnameNew, rlen, clen, blen ); else if( ii == InputInfo.BinaryBlockInputInfo ) { if( oi == OutputInfo.BinaryBlockOutputInfo ) partitionBinaryBlock( fname, fnameStaging, fnameNew, rlen, clen, blen ); else if ( oi == OutputInfo.BinaryCellOutputInfo ) partitionBinaryBlock2BinaryCell( fname, fnameStaging, fnameNew, rlen, clen, blen ); } else throw new DMLRuntimeException("Cannot create data partitions of format: "+ii.toString()); LocalFileUtils.cleanupWorkingDirectory(fnameStaging); } private void partitionTextCell( String fname, String fnameStaging, String fnameNew, long rlen, long clen, int blen ) { long row = -1; long col = -1; try { //STEP 1: read matrix from HDFS and write blocks to local staging area //check and add input path JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); InputSplit[] splits = informat.getSplits(job, 1); LinkedList<Cell> buffer = new LinkedList<>(); LongWritable key = new LongWritable(); Text value = new Text(); FastStringTokenizer st = new FastStringTokenizer(' '); for(InputSplit split: splits) { RecordReader<LongWritable,Text> reader = informat.getRecordReader(split, job, Reporter.NULL); try { while(reader.next(key, value)) { st.reset( value.toString() ); //reset tokenizer row = st.nextLong(); col = st.nextLong(); double lvalue = st.nextDouble(); Cell tmp = new Cell( row, col, lvalue ); buffer.addLast( tmp ); if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE ) //periodic flush { appendCellBufferToStagingArea(fnameStaging, buffer, blen); buffer.clear(); } } //final flush if( !buffer.isEmpty() ) { appendCellBufferToStagingArea(fnameStaging, buffer, blen); buffer.clear(); } } finally { IOUtilFunctions.closeSilently(reader); } } //STEP 2: read matrix blocks from staging area and write matrix to HDFS String[] fnamesPartitions = new File(fnameStaging).list(); if(PARALLEL) { int len = Math.min(fnamesPartitions.length, _par); Thread[] threads = new Thread[len]; for( int i=0;i<len;i++ ) { int start = i*(int)Math.ceil(((double)fnamesPartitions.length)/len); int end = (i+1)*(int)Math.ceil(((double)fnamesPartitions.length)/len)-1; end = Math.min(end, fnamesPartitions.length-1); threads[i] = new Thread(new DataPartitionerWorkerTextCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end)); threads[i].start(); } for( Thread t : threads ) t.join(); } else { for( String pdir : fnamesPartitions ) writeTextCellFileToHDFS( job, fnameNew, fnameStaging+"/"+pdir ); } } catch (Exception e) { //post-mortem error handling and bounds checking if( row < 1 || row > rlen || col < 1 || col > clen ) { throw new DMLRuntimeException("Matrix cell ["+(row)+","+(col)+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } else throw new DMLRuntimeException("Unable to partition text cell matrix.", e); } } @SuppressWarnings("deprecation") private void partitionBinaryCell( String fname, String fnameStaging, String fnameNew, long rlen, long clen, int blen ) { long row = -1; long col = -1; //STEP 1: read matrix from HDFS and write blocks to local staging area //check and add input path JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); try (FileSystem fs = IOUtilFunctions.getFileSystem(path, job)) { //prepare sequence file reader, and write to local staging area LinkedList<Cell> buffer = new LinkedList<>(); MatrixIndexes key = new MatrixIndexes(); MatrixCell value = new MatrixCell(); for( Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path) ) { try (SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job)) { while(reader.next(key, value)) { row = key.getRowIndex(); col = key.getColumnIndex(); Cell tmp = new Cell( row, col, value.getValue() ); buffer.addLast( tmp ); if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE ) { //periodic flush appendCellBufferToStagingArea(fnameStaging, buffer, blen); buffer.clear(); } } //final flush if( !buffer.isEmpty() ) { appendCellBufferToStagingArea(fnameStaging, buffer, blen); buffer.clear(); } } } //STEP 2: read matrix blocks from staging area and write matrix to HDFS String[] fnamesPartitions = new File(fnameStaging).list(); if(PARALLEL) { int len = Math.min(fnamesPartitions.length, _par); Thread[] threads = new Thread[len]; for( int i=0;i<len;i++ ) { int start = i*(int)Math.ceil(((double)fnamesPartitions.length)/len); int end = (i+1)*(int)Math.ceil(((double)fnamesPartitions.length)/len)-1; end = Math.min(end, fnamesPartitions.length-1); threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end)); threads[i].start(); } for( Thread t : threads ) t.join(); } else { for( String pdir : fnamesPartitions ) writeBinaryCellSequenceFileToHDFS( job, fnameNew, fnameStaging+"/"+pdir ); } } catch (Exception e) { //post-mortem error handling and bounds checking if( row < 1 || row > rlen || col < 1 || col > clen ) { throw new DMLRuntimeException("Matrix cell ["+(row)+","+(col)+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } else throw new DMLRuntimeException("Unable to partition binary cell matrix.", e); } } @SuppressWarnings("deprecation") private void partitionBinaryBlock( String fname, String fnameStaging, String fnameNew, long rlen, long clen, int blen ) { //STEP 1: read matrix from HDFS and write blocks to local staging area //check and add input path JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); try(FileSystem fs = IOUtilFunctions.getFileSystem(path, job)) { //create reuse object _reuseBlk = DataPartitioner.createReuseMatrixBlock(_format, blen, blen); //prepare sequence file reader, and write to local staging area MatrixIndexes key = new MatrixIndexes(); MatrixBlock value = new MatrixBlock(); for(Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path) ) { try(SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job)) { while(reader.next(key, value)) //for each block { long row_offset = (key.getRowIndex()-1)*blen; long col_offset = (key.getColumnIndex()-1)*blen; long rows = value.getNumRows(); long cols = value.getNumColumns(); //bound check per block if( row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols<1 || col_offset + cols > clen ) { throw new IOException("Matrix block ["+(row_offset+1)+":"+(row_offset+rows)+","+(col_offset+1)+":"+(col_offset+cols)+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } appendBlockToStagingArea(fnameStaging, value, row_offset, col_offset, blen); } } } //STEP 2: read matrix blocks from staging area and write matrix to HDFS String[] fnamesPartitions = new File(fnameStaging).list(); if(PARALLEL) { int len = Math.min(fnamesPartitions.length, _par); Thread[] threads = new Thread[len]; for( int i=0;i<len;i++ ) { int start = i*(int)Math.ceil(((double)fnamesPartitions.length)/len); int end = (i+1)*(int)Math.ceil(((double)fnamesPartitions.length)/len)-1; end = Math.min(end, fnamesPartitions.length-1); threads[i] = new Thread(new DataPartitionerWorkerBinaryBlock(job, fnameNew, fnameStaging, fnamesPartitions, start, end)); threads[i].start(); } for( Thread t : threads ) t.join(); } else { for( String pdir : fnamesPartitions ) writeBinaryBlockSequenceFileToHDFS( job, fnameNew, fnameStaging+"/"+pdir, false ); } } catch (Exception e) { throw new DMLRuntimeException("Unable to partition binary block matrix.", e); } } @SuppressWarnings({ "deprecation", "resource" }) private void partitionBinaryBlock2BinaryCell( String fname, String fnameStaging, String fnameNew, long rlen, long clen, int blen ) { try { //STEP 1: read matrix from HDFS and write blocks to local staging area //check and add input path JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); //prepare sequence file reader, and write to local staging area MatrixIndexes key = new MatrixIndexes(); MatrixBlock value = new MatrixBlock(); LinkedList<Cell> buffer = new LinkedList<>(); for(Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path) ) { try(SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job)) { while(reader.next(key, value)) //for each block { long row_offset = (key.getRowIndex()-1)*blen; long col_offset = (key.getColumnIndex()-1)*blen; long rows = value.getNumRows(); long cols = value.getNumColumns(); //bound check per block if( row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols<1 || col_offset + cols > clen ) { throw new IOException("Matrix block ["+(row_offset+1)+":"+(row_offset+rows)+","+(col_offset+1)+":"+(col_offset+cols)+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } boolean sparse = value.isInSparseFormat(); if( sparse ) //SPARSE { Iterator<IJV> iter = value.getSparseBlockIterator(); while( iter.hasNext() ) { IJV lcell = iter.next(); Cell tmp = new Cell( row_offset + lcell.getI() + 1, col_offset + lcell.getJ() + 1, lcell.getV() ); buffer.addLast( tmp ); } } else //DENSE { for( int i=0; i<rows; i++ ) for( int j=0; j<cols; j++ ) { double lvalue = value.getValueDenseUnsafe(i, j); if( lvalue != 0 ) //for nnz { Cell tmp = new Cell( row_offset + i + 1, col_offset + j + 1, lvalue ); buffer.addLast( tmp ); } } } appendCellBufferToStagingArea(fnameStaging, buffer, blen); buffer.clear(); } } } //STEP 2: read matrix blocks from staging area and write matrix to HDFS String[] fnamesPartitions = new File(fnameStaging).list(); if(PARALLEL) { int len = Math.min(fnamesPartitions.length, _par); Thread[] threads = new Thread[len]; for( int i=0;i<len;i++ ) { int start = i*(int)Math.ceil(((double)fnamesPartitions.length)/len); int end = (i+1)*(int)Math.ceil(((double)fnamesPartitions.length)/len)-1; end = Math.min(end, fnamesPartitions.length-1); threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end)); threads[i].start(); } for( Thread t : threads ) t.join(); } else { for( String pdir : fnamesPartitions ) writeBinaryCellSequenceFileToHDFS( job, fnameNew, fnameStaging+"/"+pdir ); } } catch (Exception e) { throw new DMLRuntimeException("Unable to partition binary block matrix.", e); } } private void appendBlockToStagingArea( String dir, MatrixBlock mb, long row_offset, long col_offset, long blen ) throws IOException { //NOTE: for temporary block we always create dense representations boolean sparse = mb.isInSparseFormat(); long nnz = mb.getNonZeros(); long rows = mb.getNumRows(); long cols = mb.getNumColumns(); double sparsity = ((double)nnz)/(rows*cols); if( _format == PDataPartitionFormat.ROW_WISE ) { _reuseBlk.reset( 1, (int)cols, sparse, (int) (cols*sparsity) ); for( int i=0; i<rows; i++ ) { String pdir = LocalFileUtils.checkAndCreateStagingDir(dir+"/"+(row_offset+1+i)); String pfname = pdir+"/"+"block_"+(col_offset/blen+1); mb.slice(i, i, 0, (int)(cols-1), _reuseBlk); LocalFileUtils.writeMatrixBlockToLocal(pfname, _reuseBlk); _reuseBlk.reset(); } } else if( _format == PDataPartitionFormat.ROW_BLOCK_WISE ) { String pdir = LocalFileUtils.checkAndCreateStagingDir(dir+"/"+(row_offset/blen+1)); String pfname = pdir+"/"+"block_"+(col_offset/blen+1); LocalFileUtils.writeMatrixBlockToLocal(pfname, mb); } else if( _format == PDataPartitionFormat.COLUMN_WISE ) { //create object for reuse _reuseBlk.reset( (int)rows, 1, false ); for( int i=0; i<cols; i++ ) { String pdir = LocalFileUtils.checkAndCreateStagingDir(dir+"/"+(col_offset+1+i)); String pfname = pdir+"/"+"block_"+(row_offset/blen+1); mb.slice(0, (int)(rows-1), i, i, _reuseBlk); LocalFileUtils.writeMatrixBlockToLocal(pfname, _reuseBlk); _reuseBlk.reset(); } } else if( _format == PDataPartitionFormat.COLUMN_BLOCK_WISE ) { String pdir = LocalFileUtils.checkAndCreateStagingDir(dir+"/"+(col_offset/blen+1)); String pfname = pdir+"/"+"block_"+(row_offset/blen+1); LocalFileUtils.writeMatrixBlockToLocal(pfname, mb); } } private void appendCellBufferToStagingArea( String dir, LinkedList<Cell> buffer, int blen ) throws IOException { HashMap<Long,LinkedList<Cell>> sortedBuffer = new HashMap<>(); //sort cells in buffer wrt key long key = -1; for( Cell c : buffer ) { switch(_format) { case ROW_WISE: key = c.getRow(); c.setRow(1); break; case ROW_BLOCK_WISE: key = (c.getRow()-1)/blen+1; c.setRow((c.getRow()-1)%blen+1); break; case COLUMN_WISE: key = c.getCol(); c.setCol(1); break; case COLUMN_BLOCK_WISE: key = (c.getCol()-1)/blen+1; c.setCol((c.getCol()-1)%blen+1); break; default: //do nothing } if( !sortedBuffer.containsKey(key) ) sortedBuffer.put(key, new LinkedList<Cell>()); sortedBuffer.get(key).addLast(c); } //write lists of cells to local files for( Entry<Long,LinkedList<Cell>> e : sortedBuffer.entrySet() ) { String pdir = LocalFileUtils.checkAndCreateStagingDir(dir+"/"+e.getKey()); String pfname = pdir+"/"+"block_"+_seq.getNextID(); StagingFileUtils.writeCellListToLocal(pfname, e.getValue()); } } ///////////////////////////////////// // Helper methods for HDFS // // read/write in different formats // ///////////////////////////////////// @SuppressWarnings({ "deprecation", "resource" }) public void writeBinaryBlockSequenceFileToHDFS( JobConf job, String dir, String lpdir, boolean threadsafe ) throws IOException { long key = getKeyFromFilePath(lpdir); Path path = new Path(dir+"/"+key); SequenceFile.Writer writer = null; try { FileSystem fs = IOUtilFunctions.getFileSystem(path, job); writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); //beware ca 50ms String[] fnameBlocks = new File( lpdir ).list(); for( String fnameBlock : fnameBlocks ) { long key2 = getKey2FromFileName(fnameBlock); MatrixBlock tmp = null; if( threadsafe ) tmp = LocalFileUtils.readMatrixBlockFromLocal(lpdir+"/"+fnameBlock); else tmp = LocalFileUtils.readMatrixBlockFromLocal(lpdir+"/"+fnameBlock, _reuseBlk); if( _format == PDataPartitionFormat.ROW_WISE || _format == PDataPartitionFormat.ROW_BLOCK_WISE ) { writer.append(new MatrixIndexes(1,key2), tmp); } else if( _format == PDataPartitionFormat.COLUMN_WISE || _format == PDataPartitionFormat.COLUMN_BLOCK_WISE ) { writer.append(new MatrixIndexes(key2,1), tmp); } } } finally { IOUtilFunctions.closeSilently(writer); } } @SuppressWarnings({ "deprecation", "resource" }) public void writeBinaryCellSequenceFileToHDFS( JobConf job, String dir, String lpdir ) throws IOException { long key = getKeyFromFilePath(lpdir); Path path = new Path(dir+"/"+key); SequenceFile.Writer writer = null; try { FileSystem fs = IOUtilFunctions.getFileSystem(path, job); writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class); //beware ca 50ms MatrixIndexes indexes = new MatrixIndexes(); MatrixCell cell = new MatrixCell(); String[] fnameBlocks = new File( lpdir ).list(); for( String fnameBlock : fnameBlocks ) { LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir+"/"+fnameBlock); for( Cell c : tmp ) { indexes.setIndexes(c.getRow(), c.getCol()); cell.setValue(c.getValue()); writer.append(indexes, cell); } } } finally { IOUtilFunctions.closeSilently(writer); } } @SuppressWarnings("resource") public void writeTextCellFileToHDFS( JobConf job, String dir, String lpdir ) throws IOException { long key = getKeyFromFilePath(lpdir); Path path = new Path(dir+"/"+key); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); try(BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path,true)))) { //for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); String[] fnameBlocks = new File( lpdir ).list(); for( String fnameBlock : fnameBlocks ) { LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir+"/"+fnameBlock); for( Cell c : tmp ) { sb.append(c.getRow()); sb.append(' '); sb.append(c.getCol()); sb.append(' '); sb.append(c.getValue()); sb.append('\n'); out.write( sb.toString() ); sb.setLength(0); } } } } ///////////////////////////////// // Helper methods for local fs // // read/write // ///////////////////////////////// private static long getKeyFromFilePath( String dir ) { String[] dirparts = dir.split("/"); long key = Long.parseLong( dirparts[dirparts.length-1] ); return key; } private static long getKey2FromFileName( String fname ) { return Long.parseLong( fname.split("_")[1] ); } private abstract class DataPartitionerWorker implements Runnable { private JobConf _job = null; private String _fnameNew = null; private String _fnameStaging = null; private String[] _fnamesPartitions = null; private int _start = -1; private int _end = -1; public DataPartitionerWorker(JobConf job, String fnameNew, String fnameStaging, String[] fnamesPartitions, int start, int end) { _job = job; _fnameNew = fnameNew; _fnameStaging = fnameStaging; _fnamesPartitions = fnamesPartitions; _start = start; _end = end; } @Override public void run() { //read each input if required try { for( int i=_start; i<=_end; i++ ) { String pdir = _fnamesPartitions[i]; writeFileToHDFS( _job, _fnameNew, _fnameStaging+"/"+pdir ); } } catch(Exception ex) { throw new RuntimeException("Failed on parallel data partitioning.", ex); } } public abstract void writeFileToHDFS( JobConf job, String fnameNew, String stagingDir ) throws IOException; } private class DataPartitionerWorkerTextCell extends DataPartitionerWorker { public DataPartitionerWorkerTextCell(JobConf job, String fnameNew, String fnameStaging, String[] fnamesPartitions, int start, int end) { super(job, fnameNew, fnameStaging, fnamesPartitions, start, end); } @Override public void writeFileToHDFS(JobConf job, String fnameNew, String stagingDir) throws IOException { writeTextCellFileToHDFS( job, fnameNew, stagingDir ); } } private class DataPartitionerWorkerBinaryCell extends DataPartitionerWorker { public DataPartitionerWorkerBinaryCell(JobConf job, String fnameNew, String fnameStaging, String[] fnamesPartitions, int start, int end) { super(job, fnameNew, fnameStaging, fnamesPartitions, start, end); } @Override public void writeFileToHDFS(JobConf job, String fnameNew, String stagingDir) throws IOException { writeBinaryCellSequenceFileToHDFS( job, fnameNew, stagingDir ); } } private class DataPartitionerWorkerBinaryBlock extends DataPartitionerWorker { public DataPartitionerWorkerBinaryBlock(JobConf job, String fnameNew, String fnameStaging, String[] fnamesPartitions, int start, int end) { super(job, fnameNew, fnameStaging, fnamesPartitions, start, end); } @Override public void writeFileToHDFS(JobConf job, String fnameNew, String stagingDir) throws IOException { writeBinaryBlockSequenceFileToHDFS( job, fnameNew, stagingDir, true ); } } }