org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EthereumFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
public void readEthereumBlockInputFormatGenesisBlock() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName="ethgenesis.bin";
	String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
	Path file = new Path(fileNameBlock);
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();

	List<InputSplit> splits = format.getSplits(job);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	assertEquals( 1, splits.size(),"Only one split generated for genesis block");
	RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull( reader,"Format returned  null RecordReader");
	reader.initialize(splits.get(0),context);
	BytesWritable key = new BytesWritable();
	EthereumBlock block = new EthereumBlock();
	assertTrue( reader.nextKeyValue(),"Input Split for genesis block contains at least one block");
	key=reader.getCurrentKey();
	block=reader.getCurrentValue();
	assertEquals( 0, block.getEthereumTransactions().size(),"Genesis Block must have 0 transactions");
	assertFalse( reader.nextKeyValue(),"No further blocks in genesis Block");
	reader.close();
}
 
Example #2
Source File: MROutputCommitter.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Override
public void recoverTask(int taskIndex, int attemptId) throws IOException {
  if (!initialized) {
    throw new RuntimeException("Committer not initialized");
  }
  TaskAttemptID taskAttemptID = new TaskAttemptID(
      Long.toString(context.getApplicationId().getClusterTimestamp())
      + String.valueOf(context.getVertexIndex()),
      context.getApplicationId().getId(),
      ((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ?
          TaskType.MAP : TaskType.REDUCE)),
      taskIndex, attemptId);
  TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf,
      taskAttemptID);
  committer.recoverTask(taskContext);
}
 
Example #3
Source File: PentahoTwitterInputFormat.java    From pentaho-hadoop-shims with Apache License 2.0 6 votes vote down vote up
@Override
public IPentahoRecordReader createRecordReader( IPentahoInputSplit split ) throws Exception {
  return inClassloader( () -> {
    PentahoInputSplitImpl pentahoInputSplit = (PentahoInputSplitImpl) split;
    InputSplit inputSplit = pentahoInputSplit.getInputSplit();

    ReadSupport<RowMetaAndData> readSupport = new PentahoParquetReadSupport();

    ParquetRecordReader<RowMetaAndData> nativeRecordReader =
      new ParquetRecordReader<>( readSupport, ParquetInputFormat.getFilter( job
        .getConfiguration() ) );
    TaskAttemptContextImpl task = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID() );
    nativeRecordReader.initialize( inputSplit, task );

    return new PentahoParquetRecordReader( nativeRecordReader );
  } );
}
 
Example #4
Source File: HFileAppender.java    From tajo with Apache License 2.0 6 votes vote down vote up
@Override
public void init() throws IOException {
  super.init();

  Configuration taskConf = new Configuration();
  Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
  taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString());

  ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId();
  writerContext = new TaskAttemptContextImpl(taskConf,
      new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP,
          taskAttemptId.getTaskId().getId(), taskAttemptId.getId()));

  HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2();
  try {
    writer = hFileOutputFormat2.getRecordWriter(writerContext);

    committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext);
    workingFilePath = committer.getWorkPath();
  } catch (InterruptedException e) {
    throw new IOException(e.getMessage(), e);
  }

  LOG.info("Created hbase file writer: " + workingFilePath);
}
 
Example #5
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint()
		throws IOException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2003encrypt.xls";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	// set locale to the one of the test data
	conf.set("hadoopoffice.read.locale.bcp47", "de");

	// low footprint
	conf.set("hadoopoffice.read.lowFootprint", "true");
	// for decryption simply set the password
	conf.set("hadoopoffice.read.security.crypt.password", "test2");
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	List<InputSplit> splits = format.getSplits(job);
	assertEquals(1, splits.size(), "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);

	InterruptedException ex = assertThrows(InterruptedException.class,
			() -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password");
}
 
Example #6
Source File: TestMRCJCFileOutputCommitter.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void testEmptyOutput() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // Do not write any output

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #7
Source File: MROutputCommitter.java    From tez with Apache License 2.0 6 votes vote down vote up
@Override
public void recoverTask(int taskIndex, int attemptId) throws IOException {
  if (!initialized) {
    throw new RuntimeException("Committer not initialized");
  }
  TaskAttemptID taskAttemptID = new TaskAttemptID(
      Long.toString(getContext().getApplicationId().getClusterTimestamp())
      + String.valueOf(getContext().getVertexIndex()),
      getContext().getApplicationId().getId(),
      ((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ?
          TaskType.MAP : TaskType.REDUCE)),
      taskIndex, attemptId);
  TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf,
      taskAttemptID);
  committer.recoverTask(taskContext);
}
 
Example #8
Source File: BitcoinFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
 public void readBitcoinRawBlockInputFormatBlockVersion1() throws IOException, InterruptedException {
   Configuration conf = new Configuration(defaultConf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version1.blk";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   Job job = Job.getInstance(conf);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for block version 1");
   	RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 482, block.getLength(),"Random block version 1  must have size of 482 bytes");
   	assertFalse( reader.nextKeyValue(),"No further blocks in block version 1");
reader.close();
 }
 
Example #9
Source File: ContentIndexingColumnBasedHandlerTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    
    conf = new Configuration();
    conf.addResource("config/all-config.xml");
    ctx = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ctx.getConfiguration().setInt(ContentIndexingColumnBasedHandler.NUM_SHARDS, 131);
    ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_TNAME, "shard");
    ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_GIDX_TNAME, "shardIndex");
    ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_GRIDX_TNAME, "shardIndex");
    ctx.getConfiguration().set(TypeRegistry.INGEST_DATA_TYPES, "test");
    ctx.getConfiguration().set("data.name", "test");
    ctx.getConfiguration().set("test.data.auth.id.mode", "NEVER");
    ctx.getConfiguration().set("test" + BaseIngestHelper.DEFAULT_TYPE, LcNoDiacriticsType.class.getName());
    ctx.getConfiguration().set("test" + TypeRegistry.HANDLER_CLASSES, TestContentIndexingColumnBasedHandler.class.getName());
    ctx.getConfiguration().set("test" + TypeRegistry.RAW_READER, TestEventRecordReader.class.getName());
    ctx.getConfiguration().set("test" + TypeRegistry.INGEST_HELPER, TestContentBaseIngestHelper.class.getName());
    ctx.getConfiguration().set(TypeRegistry.EXCLUDED_HANDLER_CLASSES, "FAKE_HANDLER_CLASS"); // it will die if this field is not faked
    
    helper = new TestContentBaseIngestHelper();
    colVis = new ColumnVisibility("");
}
 
Example #10
Source File: DateIndexDataTypeHandlerTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws Exception {
    
    conf = new Configuration();
    conf.set("num.shards", "11");
    conf.set("data.name", "testdatatype");
    conf.set("testdatatype.ingest.helper.class", TestBaseIngestHelper.class.getName());
    conf.set("testdatatype.handler.classes", DateIndexDataTypeHandler.class.getName());
    
    // date index configuration
    conf.set("date.index.table.name", TableName.DATE_INDEX);
    conf.set("date.index.table.loader.priority", "30");
    conf.set("DateIndex.table.config.class", DateIndexTableConfigHelper.class.getName());
    conf.set("date.index.table.locality.groups", "activity:ACTIVITY,loaded:LOADED");
    
    // some date index configuration for our "testdatatype" events
    conf.set("testdatatype.date.index.type.to.field.map", "ACTIVITY=ACTIVITY_DATE,LOADED=LOAD_DATE");
    conf.set("all" + Properties.INGEST_POLICY_ENFORCER_CLASS, IngestPolicyEnforcer.NoOpIngestPolicyEnforcer.class.getName());
    
    TypeRegistry.reset();
    TypeRegistry.getInstance(conf);
    
    handler = new DateIndexDataTypeHandler<>();
    handler.setup(new TaskAttemptContextImpl(conf, new TaskAttemptID()));
}
 
Example #11
Source File: BitcoinFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
 public void readBitcoinRawBlockInputFormatReqSeekBlockVersion1() throws IOException, InterruptedException {
   Configuration conf = new Configuration(defaultConf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="reqseekversion1.blk";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
       	Job job = Job.getInstance(conf);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for block requiring seek version 1");
   	RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 482, block.getLength(),"Random block requiring seek version 1 must have a size of 482 bytes");
   	assertFalse( reader.nextKeyValue(),"No further blocks in block requiring seek version 1");
reader.close();
 }
 
Example #12
Source File: BitcoinFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
 public void readBitcoinBlockInputFormatBlockVersion4() throws IOException, InterruptedException {
   Configuration conf = new Configuration(defaultConf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4.blk";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   Job job = Job.getInstance(conf);
   FileInputFormat.setInputPaths(job, file);
   BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat();
  List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for block version 4");
   	RecordReader<BytesWritable, BitcoinBlock> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BitcoinBlock block = new BitcoinBlock();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 936, block.getTransactions().size(),"Random block version 4 must contain exactly 936 transactions");
   	assertFalse( reader.nextKeyValue(),"No further blocks in block version 4");
reader.close();
 }
 
Example #13
Source File: EthereumFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
public void readEthereumBlockInputFormatBlock1346406GzipCompressed() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName="eth1346406.bin.gz";
	String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
	Path file = new Path(fileNameBlock);
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();

	List<InputSplit> splits = format.getSplits(job);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	assertEquals( 1, splits.size(),"Only one split generated for block 1346406");
	RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull( reader,"Format returned  null RecordReader");
	reader.initialize(splits.get(0),context);
	BytesWritable key = new BytesWritable();
	EthereumBlock block = new EthereumBlock();
	assertTrue( reader.nextKeyValue(),"Input Split for block 1346406 contains at least one block");
	key=reader.getCurrentKey();
	block=reader.getCurrentValue();
	assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
	assertFalse( reader.nextKeyValue(),"No further blocks in block 1346406");
	reader.close();
}
 
Example #14
Source File: JsonTestFileLoader.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Override
public void loadTestData(SequenceFile.Writer seqFile) throws IOException {
    TypeRegistry.reset();
    TypeRegistry.getInstance(this.conf);
    Path path = new Path(this.uri);
    File file = new File(this.uri);
    FileSplit split = new FileSplit(path, 0, file.length(), null);
    TaskAttemptContext ctx = new TaskAttemptContextImpl(this.conf, new TaskAttemptID());
    
    try (JsonRecordReader reader = new JsonRecordReader()) {
        reader.initialize(split, ctx);
        while (reader.nextKeyValue()) {
            RawRecordContainer raw = reader.getEvent();
            seqFile.append(new Text(), raw);
        }
    }
}
 
Example #15
Source File: BitcoinFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
 public void readBitcoinTransactionInputFormatBlockVersion3() throws IOException, InterruptedException {
   Configuration conf = new Configuration(defaultConf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version3.blk";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   Job job = Job.getInstance(conf);
   FileInputFormat.setInputPaths(job, file);
   BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for block version 3");
   	RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
int transactCount=0;
while (reader.nextKeyValue()) {
	transactCount++;
}
assertEquals( 1645, transactCount,"Block version 3 must contain exactly 1645 transactions");
reader.close();
 }
 
Example #16
Source File: BitcoinFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
 public void readBitcoinBlockInputFormatGenesisBlock() throws IOException, InterruptedException {
   Configuration conf = new Configuration(defaultConf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="genesis.blk";
   String fileNameGenesis=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameGenesis);
   Job job = Job.getInstance(conf);
   FileInputFormat.setInputPaths(job, file);
   BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for genesis block");
   	RecordReader<BytesWritable, BitcoinBlock> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable genesisKey = new BytesWritable();	
BitcoinBlock genesisBlock = new BitcoinBlock();
assertTrue( reader.nextKeyValue(),"Input Split for genesis block contains at least one block");
genesisBlock=reader.getCurrentValue();
assertEquals( 1, genesisBlock.getTransactions().size(),"Genesis Block must contain exactly one transaction");
   	assertFalse( reader.nextKeyValue(),"No further blocks in genesis Block");
reader.close();
 }
 
Example #17
Source File: StatementPatternStorageTest.java    From rya with Apache License 2.0 6 votes vote down vote up
protected List<StatementPatternStorage> createStorages(final String location) throws IOException, InterruptedException {
    final List<StatementPatternStorage> storages = new ArrayList<StatementPatternStorage>();
    StatementPatternStorage storage = new StatementPatternStorage();
    final InputFormat<?, ?> inputFormat = storage.getInputFormat();
    Job job = Job.getInstance(new Configuration());
    storage.setLocation(location, job);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    assertNotNull(splits);

    for (final InputSplit inputSplit : splits) {
        storage = new StatementPatternStorage();
        job = Job.getInstance(new Configuration());
        storage.setLocation(location, job);
        final TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID("jtid", 0, TaskType.REDUCE, 0, 0));
        final RecordReader<?, ?> recordReader = inputFormat.createRecordReader(inputSplit,
                taskAttemptContext);
        recordReader.initialize(inputSplit, taskAttemptContext);

        storage.prepareToRead(recordReader, null);
        storages.add(storage);
    }
    return storages;
}
 
Example #18
Source File: BitcoinFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
 public void readBitcoinTransactionInputFormatBlockVersion1ReqSeek() throws IOException, InterruptedException {
  Configuration conf = new Configuration(defaultConf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="reqseekversion1.blk";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   Job job = Job.getInstance(conf);
   FileInputFormat.setInputPaths(job, file);
   BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for block version 1 requiring seek");
   	RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
int transactCount=0;
while (reader.nextKeyValue()) {
	transactCount++;
}
assertEquals( 2, transactCount,"Block version 1 requiring seek must contain exactly two transactions");
reader.close();
 }
 
Example #19
Source File: BitcoinFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
 public void readBitcoinRawBlockInputFormatGzipCompressed() throws IOException, InterruptedException {
  Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec gzip = new GzipCodec();
   ReflectionUtils.setConf(gzip, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.gz";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
   	RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
   	assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
 }
 
Example #20
Source File: EthereumFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
public void readEthereumBlockInputFormatBlock1() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName="eth1.bin";
	String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
	Path file = new Path(fileNameBlock);
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();

	List<InputSplit> splits = format.getSplits(job);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	assertEquals( 1, splits.size(),"Only one split generated for block 1");
	RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull( reader,"Format returned  null RecordReader");
	reader.initialize(splits.get(0),context);
	BytesWritable key = new BytesWritable();
	EthereumBlock block = new EthereumBlock();
	assertTrue( reader.nextKeyValue(),"Input Split for block 1 contains at least one block");
	key=reader.getCurrentKey();
	block=reader.getCurrentValue();
	assertEquals( 0, block.getEthereumTransactions().size(),"Block 1 must have 0 transactions");
	assertFalse( reader.nextKeyValue(),"No further blocks in block 1");
	reader.close();
}
 
Example #21
Source File: TestCRAMInputFormatOnHDFS.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
@Before
public void setup() throws Exception {
  Configuration conf = new Configuration();
  input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
  reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString();
  String referenceIndex = ClassLoader.getSystemClassLoader().getResource("auxf.fa.fai")
      .toURI().toString();
  conf.set("mapred.input.dir", "file://" + input);

  URI hdfsRef = clusterUri.resolve("/tmp/auxf.fa");
  URI hdfsRefIndex = clusterUri.resolve("/tmp/auxf.fa.fai");
  Files.copy(Paths.get(URI.create(reference)), Paths.get(hdfsRef));
  Files.copy(Paths.get(URI.create(referenceIndex)), Paths.get(hdfsRefIndex));

  conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, hdfsRef.toString());


  taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
  jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());

}
 
Example #22
Source File: TestRecovery.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void writeBadOutput(TaskAttempt attempt, Configuration conf)
  throws Exception {
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, 
      TypeConverter.fromYarn(attempt.getID()));
 
  TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat
      .getRecordWriter(tContext);
  
  NullWritable nullWritable = NullWritable.get();
  try {
    theRecordWriter.write(key2, val2);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val2);
    theRecordWriter.write(nullWritable, val1);
    theRecordWriter.write(key1, nullWritable);
    theRecordWriter.write(key2, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key1, val1);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}
 
Example #23
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
  SleepJob.SleepMapper test = new SleepJob.SleepMapper();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();
  FakeRecordLLReader reader = new FakeRecordLLReader();
  LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  SleepSplit split = getSleepSplit();
  MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
          conf, taskId, reader, writer, committer, reporter, split);
  Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
          .getMapContext(mapcontext);

  long start = System.currentTimeMillis();
  LOG.info("start:" + start);
  LongWritable key = new LongWritable(start + 2000);
  LongWritable value = new LongWritable(start + 2000);
  // should slip 2 sec
  test.map(key, value, context);
  LOG.info("finish:" + System.currentTimeMillis());
  assertTrue(System.currentTimeMillis() >= (start + 2000));

  test.cleanup(context);
  assertEquals(1, writer.getData().size());
}
 
Example #24
Source File: TestMRCJCFileOutputCommitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  File expectedFile = new File(new Path(outDir, partFile).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #25
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  TaskAttemptID taskId = new TaskAttemptID();
  RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();

  LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();

  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  LoadSplit split = getLoadSplit();

  MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
          conf, taskId, reader, writer, committer, reporter, split);
  // context
  Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
          .getMapContext(mapContext);

  reader.initialize(split, ctx);
  ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  CompressionEmulationUtil.setCompressionEmulationEnabled(
          ctx.getConfiguration(), true);

  LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
  // setup, map, clean
  mapper.run(ctx);

  Map<GridmixKey, GridmixRecord> data = writer.getData();
  // check result
  assertEquals(2, data.size());

}
 
Example #26
Source File: InputSampler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, Job job) 
    throws IOException, InterruptedException {
  List<InputSplit> splits = inf.getSplits(job);
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.size());
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(
        job.getConfiguration(), new TaskAttemptID());
    RecordReader<K,V> reader = inf.createRecordReader(
        splits.get(i), samplingContext);
    reader.initialize(splits.get(i), samplingContext);
    while (reader.nextKeyValue()) {
      samples.add(ReflectionUtils.copy(job.getConfiguration(),
                                       reader.getCurrentKey(), null));
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
Example #27
Source File: TestCombineFileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testReinit() throws Exception {
  // Test that a split containing multiple files works correctly,
  // with the child RecordReader getting its initialize() method
  // called a second time.
  TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
  Configuration conf = new Configuration();
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);

  // This will create a CombineFileRecordReader that itself contains a
  // DummyRecordReader.
  InputFormat inputFormat = new ChildRRInputFormat();

  Path [] files = { new Path("file1"), new Path("file2") };
  long [] lengths = { 1, 1 };

  CombineFileSplit split = new CombineFileSplit(files, lengths);
  RecordReader rr = inputFormat.createRecordReader(split, context);
  assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

  // first initialize() call comes from MapTask. We'll do it here.
  rr.initialize(split, context);

  // First value is first filename.
  assertTrue(rr.nextKeyValue());
  assertEquals("file1", rr.getCurrentValue().toString());

  // The inner RR will return false, because it only emits one (k, v) pair.
  // But there's another sub-split to process. This returns true to us.
  assertTrue(rr.nextKeyValue());
  
  // And the 2nd rr will have its initialize method called correctly.
  assertEquals("file2", rr.getCurrentValue().toString());
  
  // But after both child RR's have returned their singleton (k, v), this
  // should also return false.
  assertFalse(rr.nextKeyValue());
}
 
Example #28
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "multisheetheader.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	// set locale to the one of the test data
	conf.set("hadoopoffice.read.locale.bcp47", "us");
	conf.set("hadoopoffice.read.header.read", "true");
	conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true");
	conf.set("hadoopoffice.read.header.column.names.regex","column");
	conf.set("hadoopoffice.read.header.column.names.replace", "spalte");
	conf.set("hadoopoffice.read.lowFootprint", "true");
	conf.set("hadoopoffice.read.lowFootprint.parser", "stax");
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	List<InputSplit> splits = format.getSplits(job);
	assertEquals(1, splits.size(), "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull(reader, "Format returned  null RecordReader");
	reader.initialize(splits.get(0), context);

	assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0],
			" header column 1 correctly read");
	assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1],
			" header column 2 correctly read");
	assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2],
			" header column 3 correctly read");
}
 
Example #29
Source File: TestLineRecordReader.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

  // read the data and check whether BOM is skipped
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);

  //noinspection StatementWithEmptyBody
  while (reader.nextKeyValue()) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}
 
Example #30
Source File: TestVCFOutputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Before
public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException {
    test_vcf_output = File.createTempFile("test_vcf_output", "");
    test_vcf_output.delete();
    writable = new VariantContextWritable();
    Configuration conf = new Configuration();
    conf.set("hadoopbam.vcf.output-format", "VCF");
    KeyIgnoringVCFOutputFormat<Long> outputFormat = new KeyIgnoringVCFOutputFormat<Long>(conf);
    outputFormat.setHeader(readHeader());
    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
    writer = outputFormat.getRecordWriter(taskAttemptContext, new Path("file://" + test_vcf_output));
}