Java Code Examples for org.apache.hadoop.io.SequenceFile#Metadata
The following examples show how to use
org.apache.hadoop.io.SequenceFile#Metadata .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HDFSTransactionLogTest.java From phoenix-tephra with Apache License 2.0 | 6 votes |
private SequenceFile.Writer getSequenceFileWriter(Configuration configuration, FileSystem fs, long timeInMillis, byte versionNumber) throws IOException { String snapshotDir = configuration.get(TxConstants.Manager.CFG_TX_SNAPSHOT_DIR); Path newLog = new Path(snapshotDir, LOG_FILE_PREFIX + timeInMillis); SequenceFile.Metadata metadata = new SequenceFile.Metadata(); if (versionNumber > 1) { metadata.set(new Text(TxConstants.TransactionLog.VERSION_KEY), new Text(Byte.toString(versionNumber))); } switch (versionNumber) { case 1: case 2: return SequenceFile.createWriter(fs, configuration, newLog, LongWritable.class, co.cask.tephra.persist.TransactionEdit.class, SequenceFile.CompressionType.NONE, null, null, metadata); default: return SequenceFile.createWriter(fs, configuration, newLog, LongWritable.class, TransactionEdit.class, SequenceFile.CompressionType.NONE, null, null, metadata); } }
Example 2
Source File: ReadSequenceFileTest.java From kite with Apache License 2.0 | 6 votes |
/** * return a mapping of expected keys -> records */ private HashMap<String, Record> createTextSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, Text.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); Text value = new Text("value" + i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; }
Example 3
Source File: ReadSequenceFileTest.java From kite with Apache License 2.0 | 6 votes |
/** * return a mapping of expected keys -> records */ private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, ParseTextMyWritableBuilder.MyWritable.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; }
Example 4
Source File: HDFSTransactionLog.java From phoenix-tephra with Apache License 2.0 | 5 votes |
LogWriter(FileSystem fs, Configuration hConf, Path logPath) throws IOException { // TODO: retry a few times to ride over transient failures? SequenceFile.Metadata metadata = new SequenceFile.Metadata(); metadata.set(new Text(TxConstants.TransactionLog.VERSION_KEY), new Text(Byte.toString(TxConstants.TransactionLog.CURRENT_VERSION))); this.internalWriter = SequenceFile.createWriter(fs, hConf, logPath, LongWritable.class, TransactionEdit.class, SequenceFile.CompressionType.NONE, null, null, metadata); LOG.debug("Created a new TransactionLog writer for " + logPath); }
Example 5
Source File: ReadRCFileBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override protected boolean doProcess(Record record, InputStream in) throws IOException { Path attachmentPath = getAttachmentPath(record); SingleStreamFileSystem fs = new SingleStreamFileSystem(in, attachmentPath); RCFile.Reader reader = null; try { reader = new RCFile.Reader(fs, attachmentPath, conf); Record template = record.copy(); removeAttachments(template); template.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE); if (includeMetaData) { SequenceFile.Metadata metadata = reader.getMetadata(); if (metadata != null) { template.put(RC_FILE_META_DATA, metadata); } } switch (readMode) { case row: return readRowWise(reader, template); case column: return readColumnWise(reader, template); default : throw new IllegalStateException(); } } catch (IOException e) { throw new MorphlineRuntimeException("IOException while processing attachment " + attachmentPath.getName(), e); } finally { if (reader != null) { reader.close(); } } }
Example 6
Source File: ReadRCFileTest.java From kite with Apache License 2.0 | 5 votes |
private void createRCFile(final String fileName, final int numRecords, final int maxColumns, boolean addNullValue) throws IOException { // Write the sequence file SequenceFile.Metadata metadata = getMetadataForRCFile(); Configuration conf = new Configuration(); conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns)); Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName)); RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null, metadata, null); for (int row = 0; row < numRecords; row++) { BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns); dataWrite.resetValid(maxColumns); for (int column = 0; column < maxColumns; column++) { Writable sampleText = new Text( "ROW-NUM:" + row + ", COLUMN-NUM:" + column); // Set the last column of the last row as null if (addNullValue && column == maxColumns - 1 && row == numRecords - 1) { sampleText = NullWritable.get(); } ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput(); sampleText.write(dataOutput); dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray())); } rcFileWriter.append(dataWrite); } rcFileWriter.close(); }
Example 7
Source File: WriterBinaryBlock.java From systemds with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") protected final void writeBinaryBlockMatrixToSequenceFile( Path path, JobConf job, FileSystem fs, MatrixBlock src, int blen, int rl, int ru ) throws IOException { boolean sparse = src.isInSparseFormat(); int rlen = src.getNumRows(); int clen = src.getNumColumns(); // 1) create sequence file writer, with right replication factor // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if( _replication > 0 ) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096), (short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if( src.getNumRows() > rlen || src.getNumColumns() > clen ) { throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if( rlen <= blen && clen <= blen && rl == 0 ) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros()); //create and write subblocks of matrix for(int blockRow = rl/blen; blockRow < (int)Math.ceil(ru/(double)blen); blockRow++) for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++) { int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen; int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen; int row_offset = blockRow*blen; int col_offset = blockCol*blen; //get reuse matrix block MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen); //copy submatrix to block src.slice( row_offset, row_offset+maxRow-1, col_offset, col_offset+maxCol-1, block ); //append block to sequence file indexes.setIndexes(blockRow+1, blockCol+1); writer.append(indexes, block); //reset block for later reuse block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
Example 8
Source File: WriterBinaryBlock.java From systemds with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) throws IOException, DMLRuntimeException { boolean sparse = src.isInSparseFormat(); // 1) create sequence file writer, with right replication factor // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if( _replication > 0 ) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096), (short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if( src.getNumRows() > rlen || src.getNumColumns() > clen ) { throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if( rlen <= blen && clen <= blen ) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros()); MatrixBlock emptyBlock = new MatrixBlock(); //create and write subblocks of matrix for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++) for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++) { int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen; int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen; MatrixBlock block = null; if( blockRow==blockCol ) //block on diagonal { int row_offset = blockRow*blen; int col_offset = blockCol*blen; //get reuse matrix block block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen); //copy submatrix to block src.slice( row_offset, row_offset+maxRow-1, col_offset, col_offset+maxCol-1, block ); } else //empty block (not on diagonal) { block = emptyBlock; block.reset(maxRow, maxCol); } //append block to sequence file indexes.setIndexes(blockRow+1, blockCol+1); writer.append(indexes, block); //reset block for later reuse if( blockRow!=blockCol ) block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
Example 9
Source File: WriterBinaryBlock.java From systemds with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") protected final void writeBinaryBlockMatrixToSequenceFile( Path path, JobConf job, FileSystem fs, MatrixBlock src, int blen, int rl, int ru ) throws IOException { boolean sparse = src.isInSparseFormat(); int rlen = src.getNumRows(); int clen = src.getNumColumns(); // 1) create sequence file writer, with right replication factor // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if( _replication > 0 ) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096), (short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if( src.getNumRows() > rlen || src.getNumColumns() > clen ) { throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if( rlen <= blen && clen <= blen && rl == 0 ) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros()); //create and write subblocks of matrix for(int blockRow = rl/blen; blockRow < (int)Math.ceil(ru/(double)blen); blockRow++) for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++) { int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen; int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen; int row_offset = blockRow*blen; int col_offset = blockCol*blen; //get reuse matrix block MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen); //copy submatrix to block src.slice( row_offset, row_offset+maxRow-1, col_offset, col_offset+maxCol-1, block ); //append block to sequence file indexes.setIndexes(blockRow+1, blockCol+1); writer.append(indexes, block); //reset block for later reuse block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
Example 10
Source File: WriterBinaryBlock.java From systemds with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") protected final void writeDiagBinaryBlockMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int blen ) throws IOException, DMLRuntimeException { boolean sparse = src.isInSparseFormat(); // 1) create sequence file writer, with right replication factor // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if( _replication > 0 ) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(HDFSTool.IO_FILE_BUFFER_SIZE, 4096), (short)_replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if( src.getNumRows() > rlen || src.getNumColumns() > clen ) { throw new IOException("Matrix block [1:"+src.getNumRows()+",1:"+src.getNumColumns()+"] " + "out of overall matrix range [1:"+rlen+",1:"+clen+"]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if( rlen <= blen && clen <= blen ) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, blen, sparse, src.getNonZeros()); MatrixBlock emptyBlock = new MatrixBlock(); //create and write subblocks of matrix for(int blockRow = 0; blockRow < (int)Math.ceil(src.getNumRows()/(double)blen); blockRow++) for(int blockCol = 0; blockCol < (int)Math.ceil(src.getNumColumns()/(double)blen); blockCol++) { int maxRow = (blockRow*blen + blen < src.getNumRows()) ? blen : src.getNumRows() - blockRow*blen; int maxCol = (blockCol*blen + blen < src.getNumColumns()) ? blen : src.getNumColumns() - blockCol*blen; MatrixBlock block = null; if( blockRow==blockCol ) //block on diagonal { int row_offset = blockRow*blen; int col_offset = blockCol*blen; //get reuse matrix block block = getMatrixBlockForReuse(blocks, maxRow, maxCol, blen); //copy submatrix to block src.slice( row_offset, row_offset+maxRow-1, col_offset, col_offset+maxCol-1, block ); } else //empty block (not on diagonal) { block = emptyBlock; block.reset(maxRow, maxCol); } //append block to sequence file indexes.setIndexes(blockRow+1, blockCol+1); writer.append(indexes, block); //reset block for later reuse if( blockRow!=blockCol ) block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
Example 11
Source File: ReadSequenceFileBuilder.java From kite with Apache License 2.0 | 4 votes |
@Override protected boolean doProcess(Record inputRecord, final InputStream in) throws IOException { SequenceFile.Metadata sequenceFileMetaData = null; SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(conf, SequenceFile.Reader.stream(new FSDataInputStream(new ForwardOnlySeekable(in)))); if (includeMetaData) { sequenceFileMetaData = reader.getMetadata(); } Class keyClass = reader.getKeyClass(); Class valueClass = reader.getValueClass(); Record template = inputRecord.copy(); removeAttachments(template); while (true) { Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf); Writable val = (Writable)ReflectionUtils.newInstance(valueClass, conf); try { if (!reader.next(key, val)) { break; } } catch (EOFException ex) { // SequenceFile.Reader will throw an EOFException after reading // all the data, if it doesn't know the length. Since we are // passing in an InputStream, we hit this case; LOG.trace("Received expected EOFException", ex); break; } incrementNumRecords(); Record outputRecord = template.copy(); outputRecord.put(keyField, key); outputRecord.put(valueField, val); outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE); if (includeMetaData && sequenceFileMetaData != null) { outputRecord.put(SEQUENCE_FILE_META_DATA, sequenceFileMetaData); } // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } } finally { Closeables.closeQuietly(reader); } return true; }
Example 12
Source File: ReadRCFileTest.java From kite with Apache License 2.0 | 4 votes |
private SequenceFile.Metadata getMetadataForRCFile() { return RCFile.createMetadata(new Text("metaField"), new Text("metaValue")); }