org.apache.hadoop.mapreduce.RecordReader Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.RecordReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EthereumFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
public void readEthereumBlockInputFormatBlock1346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName="eth1346406.bin";
	String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
	Path file = new Path(fileNameBlock);
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();

	List<InputSplit> splits = format.getSplits(job);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	assertEquals( 1, splits.size(),"Only one split generated for block 1346406");
	RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull( reader,"Format returned  null RecordReader");
	reader.initialize(splits.get(0),context);
	BytesWritable key = new BytesWritable();
	EthereumBlock block = new EthereumBlock();
	assertTrue( reader.nextKeyValue(),"Input Split for block 1346406 contains at least one block");
	key=reader.getCurrentKey();
	block=reader.getCurrentValue();
	assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
	assertFalse( reader.nextKeyValue(),"No further blocks in block 1346406");
	reader.close();
}
 
Example #2
Source File: JSONInputFormat.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public RecordReader<E, Void> createRecordReader(InputSplit split,
                                                TaskAttemptContext context)
    throws IOException, InterruptedException {
  Configuration conf = Hadoop.TaskAttemptContext
      .getConfiguration.invoke(context);
  Path path;
  if (split instanceof FileSplit) {
    path = ((FileSplit) split).getPath();
  } else {
    throw new DatasetOperationException(
        "Split is not a FileSplit: %s:%s",
        split.getClass().getCanonicalName(), split);
  }
  JSONFileReader<E> reader = new JSONFileReader<E>(
      path.getFileSystem(conf), path, accessor);
  reader.initialize();
  return reader.asRecordReader();
}
 
Example #3
Source File: SQLServerDBInputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
@Override
/** {@inheritDoc} */
protected RecordReader<LongWritable, T> createDBRecordReader(
    DBInputSplit split, Configuration conf) throws IOException {

  DBConfiguration dbConf = getDBConf();
  Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
  String dbProductName = getDBProductName();
  LOG.debug("Creating db record reader for db product: " + dbProductName);

  try {
    return new SQLServerDBRecordReader<T>(split, inputClass,
        conf, getConnection(), dbConf, dbConf.getInputConditions(),
        dbConf.getInputFieldNames(), dbConf.getInputTableName(),
        dbProductName);
  } catch (SQLException ex) {
    throw new IOException(ex);
  }
}
 
Example #4
Source File: EthereumFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
public void readEthereumBlockInputFormatBlock1() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName="eth1.bin";
	String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
	Path file = new Path(fileNameBlock);
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();

	List<InputSplit> splits = format.getSplits(job);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	assertEquals( 1, splits.size(),"Only one split generated for block 1");
	RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull( reader,"Format returned  null RecordReader");
	reader.initialize(splits.get(0),context);
	BytesWritable key = new BytesWritable();
	EthereumBlock block = new EthereumBlock();
	assertTrue( reader.nextKeyValue(),"Input Split for block 1 contains at least one block");
	key=reader.getCurrentKey();
	block=reader.getCurrentValue();
	assertEquals( 0, block.getEthereumTransactions().size(),"Block 1 must have 0 transactions");
	assertFalse( reader.nextKeyValue(),"No further blocks in block 1");
	reader.close();
}
 
Example #5
Source File: Chain.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Add mapper(the first mapper) that reads input from the input
 * context and writes to queue
 */
@SuppressWarnings("unchecked")
void addMapper(TaskInputOutputContext inputContext,
    ChainBlockingQueue<KeyValuePair<?, ?>> output, int index)
    throws IOException, InterruptedException {
  Configuration conf = getConf(index);
  Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
  Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS,
      Object.class);

  RecordReader rr = new ChainRecordReader(inputContext);
  RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output,
      conf);
  Mapper.Context mapperContext = createMapContext(rr, rw,
      (MapContext) inputContext, getConf(index));
  MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw);
  threads.add(runner);
}
 
Example #6
Source File: ExcelFileInputFormat.java    From components with Apache License 2.0 6 votes vote down vote up
@Override
public RecordReader<Void, IndexedRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
  String encoding = context.getConfiguration().get(TALEND_ENCODING);
  String sheet = context.getConfiguration().get(TALEND_EXCEL_SHEET_NAME);
  long header = context.getConfiguration().getLong(TALEND_HEADER, 0l);
  long footer = context.getConfiguration().getLong(TALEND_FOOTER, 0l);
  String excelFormat = context.getConfiguration().get(TALEND_EXCEL_FORMAT, "EXCEL2007");
  long limit = context.getConfiguration().getLong(TALEND_EXCEL_LIMIT, -1);
  
  if("EXCEL2007".equals(excelFormat)) {
    return new Excel2007FileRecordReader(sheet, header, footer, limit);
  } else if("EXCEL97".equals(excelFormat)) {
    return new Excel97FileRecordReader(sheet, header, footer, limit);
  } else if("HTML".equals(excelFormat)) {
    return new ExcelHTMLFileRecordReader(encoding, header, footer, limit);
  }
  
  throw new IOException("not a valid excel format");
}
 
Example #7
Source File: EthereumFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 6 votes vote down vote up
@Test
public void readEthereumBlockInputFormatGenesisBlock() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName="ethgenesis.bin";
	String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
	Path file = new Path(fileNameBlock);
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();

	List<InputSplit> splits = format.getSplits(job);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	assertEquals( 1, splits.size(),"Only one split generated for genesis block");
	RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull( reader,"Format returned  null RecordReader");
	reader.initialize(splits.get(0),context);
	BytesWritable key = new BytesWritable();
	EthereumBlock block = new EthereumBlock();
	assertTrue( reader.nextKeyValue(),"Input Split for genesis block contains at least one block");
	key=reader.getCurrentKey();
	block=reader.getCurrentValue();
	assertEquals( 0, block.getEthereumTransactions().size(),"Genesis Block must have 0 transactions");
	assertFalse( reader.nextKeyValue(),"No further blocks in genesis Block");
	reader.close();
}
 
Example #8
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint()
		throws IOException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2013encrypt.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	// set locale to the one of the test data
	conf.set("hadoopoffice.read.locale.bcp47", "de");

	// low footprint
	conf.set("hadoopoffice.read.lowFootprint", "true");
	conf.set("hadoopoffice.read.lowFootprint.parser", "sax");
	// for decryption simply set the password
	conf.set("hadoopoffice.read.security.crypt.password", "test2");
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	List<InputSplit> splits = format.getSplits(job);
	assertEquals(1, splits.size(), "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
	InterruptedException ex = assertThrows(InterruptedException.class,
			() -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password");
}
 
Example #9
Source File: HadoopCompat.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
/**
 * Instantiates MapContext under Hadoop 1 and MapContextImpl under Hadoop 2.
 */
public static MapContext newMapContext(Configuration conf,
                                       TaskAttemptID taskAttemptID,
                                       RecordReader recordReader,
                                       RecordWriter recordWriter,
                                       OutputCommitter outputCommitter,
                                       StatusReporter statusReporter,
                                       InputSplit inputSplit) {
    return (MapContext) newInstance(MAP_CONTEXT_CONSTRUCTOR,
            conf, taskAttemptID, recordReader, recordWriter, outputCommitter,
            statusReporter, inputSplit);
}
 
Example #10
Source File: MultiMapperInputFormat.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<K, V> createRecordReader(InputSplit split,
                                             TaskAttemptContext context) throws IOException,
        InterruptedException
{
    MultiMapperSplit mmSplit = (MultiMapperSplit) split;
    int multiMapperIndex = mmSplit.getMultiMapperIndex();

    return getDelegate(context.getConfiguration(), multiMapperIndex).createRecordReader(mmSplit.getActualSplit(),
                                                                                        context);
}
 
Example #11
Source File: TestFixedLengthInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Test with no record length set.
 */
@Test (timeout=5000)
public void testNoRecordLength() throws Exception {
  localFs.delete(workDir, true);
  Path file = new Path(workDir, new String("testFormat.txt"));
  createFile(file, null, 10, 10);
  // Create the job and do not set fixed record length
  Job job = Job.getInstance(defaultConf);
  FileInputFormat.setInputPaths(job, workDir);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  List<InputSplit> splits = format.getSplits(job);
  boolean exceptionThrown = false;
  for (InputSplit split : splits) {
    try {
      TaskAttemptContext context = MapReduceTestUtil.
          createDummyMapTaskAttemptContext(job.getConfiguration());
      RecordReader<LongWritable, BytesWritable> reader =
          format.createRecordReader(split, context);
      MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable>
          mcontext =
          new MapContextImpl<LongWritable, BytesWritable, LongWritable,
          BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(),
          reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
      reader.initialize(split, mcontext);
    } catch(IOException ioe) {
      exceptionThrown = true;
      LOG.info("Exception message:" + ioe.getMessage());
    }
  }
  assertTrue("Exception for not setting record length:", exceptionThrown);
}
 
Example #12
Source File: RowInputFormat.java    From gemfirexd-oss with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<Key, Row> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException, InterruptedException {
  RowRecordReader reader = new RowRecordReader();
  reader.initialize(split, context);
  return reader;
}
 
Example #13
Source File: ParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void prepareToRead(@SuppressWarnings("rawtypes") RecordReader reader, PigSplit split)
    throws IOException {
  LOG.debug("LoadFunc.prepareToRead({}, {})", reader, split);
  this.reader = reader;
}
 
Example #14
Source File: TestCombineFileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordReaderInit() throws InterruptedException, IOException {
  // Test that we properly initialize the child recordreader when
  // CombineFileInputFormat and CombineFileRecordReader are used.

  TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
  Configuration conf1 = new Configuration();
  conf1.set(DUMMY_KEY, "STATE1");
  TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);

  // This will create a CombineFileRecordReader that itself contains a
  // DummyRecordReader.
  InputFormat inputFormat = new ChildRRInputFormat();

  Path [] files = { new Path("file1") };
  long [] lengths = { 1 };

  CombineFileSplit split = new CombineFileSplit(files, lengths);

  RecordReader rr = inputFormat.createRecordReader(split, context1);
  assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

  // Verify that the initial configuration is the one being used.
  // Right after construction the dummy key should have value "STATE1"
  assertEquals("Invalid initial dummy key value", "STATE1",
    rr.getCurrentKey().toString());

  // Switch the active context for the RecordReader...
  Configuration conf2 = new Configuration();
  conf2.set(DUMMY_KEY, "STATE2");
  TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
  rr.initialize(split, context2);

  // And verify that the new context is updated into the child record reader.
  assertEquals("Invalid secondary dummy key value", "STATE2",
    rr.getCurrentKey().toString());
}
 
Example #15
Source File: MneMapreducePersonDataTest.java    From mnemonic with Apache License 2.0 5 votes vote down vote up
@Test(enabled = true, dependsOnMethods = { "testWritePersonData" })
public void testReadPersonData() throws Exception {
  long sumage = 0L;
  long reccnt = 0L;
  File folder = new File(m_workdir.toString());
  File[] listfiles = folder.listFiles();
  for (int idx = 0; idx < listfiles.length; ++idx) {
    if (listfiles[idx].isFile()
        && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
        && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
      System.out.println(String.format("Verifying : %s", listfiles[idx].getName()));
      FileSplit split = new FileSplit(
          new Path(m_workdir, listfiles[idx].getName()), 0, 0L, new String[0]);
      InputFormat<NullWritable, MneDurableInputValue<Person<Long>>> inputFormat =
          new MneInputFormat<MneDurableInputValue<Person<Long>>, Person<Long>>();
      RecordReader<NullWritable, MneDurableInputValue<Person<Long>>> reader =
          inputFormat.createRecordReader(split, m_tacontext);
      MneDurableInputValue<Person<Long>> personval = null;
      while (reader.nextKeyValue()) {
        personval = reader.getCurrentValue();
        AssertJUnit.assertTrue(personval.getValue().getAge() < 51);
        sumage += personval.getValue().getAge();
        ++reccnt;
      }
      reader.close();
    }
  }
  AssertJUnit.assertEquals(m_reccnt, reccnt);
  AssertJUnit.assertEquals(m_sumage, sumage);
  System.out.println(String.format("The checksum of ages is %d", sumage));
}
 
Example #16
Source File: AllLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<Writable, Writable> createRecordReader(
        InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {

    // this method plugs the AllReader into the system, and the
    // AllReader will when called select the correct LoadFunc
    // return new AllReader(udfSignature);
    return new AllReader(udfSignature);
}
 
Example #17
Source File: AvroInputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<AvroWrapper<T>, NullWritable> createRecordReader(
    InputSplit split, TaskAttemptContext context) throws IOException,
    InterruptedException {
  context.setStatus(split.toString());
  return new AvroRecordReader<T>();
}
 
Example #18
Source File: TestCombineFileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public RecordReader<Text,Text> createRecordReader(InputSplit split, 
    TaskAttemptContext context) throws IOException {
  return new CombineFileRecordReader((CombineFileSplit) split, context,
      (Class) DummyRecordReader.class);
}
 
Example #19
Source File: TestInputSampler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public org.apache.hadoop.mapred.RecordReader<IntWritable, NullWritable>
    getRecordReader(final org.apache.hadoop.mapred.InputSplit split,
        JobConf job, Reporter reporter) throws IOException {
  return new org.apache.hadoop.mapred.RecordReader
      <IntWritable, NullWritable>() {
    private final IntWritable i =
        new IntWritable(((MapredSequentialSplit)split).getInit());
    private int maxVal = i.get() + maxDepth + 1;

    @Override
    public boolean next(IntWritable key, NullWritable value)
        throws IOException {
      i.set(i.get() + 1);
      return i.get() < maxVal;
    }
    @Override
    public IntWritable createKey() {
      return new IntWritable(i.get());
    }
    @Override
    public NullWritable createValue() {
      return NullWritable.get();
    }
    @Override
    public long getPos() throws IOException {
      return 0;
    }
    @Override
    public void close() throws IOException {
    }
    @Override
    public float getProgress() throws IOException {
      return 0;
    }
  };
}
 
Example #20
Source File: TestCombineFileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public RecordReader<Text,Text> createRecordReader(InputSplit split, 
    TaskAttemptContext context) throws IOException {
  return new CombineFileRecordReader((CombineFileSplit) split, context,
      (Class) DummyRecordReader.class);
}
 
Example #21
Source File: CompositeInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Construct a CompositeRecordReader for the children of this InputFormat
 * as defined in the init expression.
 * The outermost join need only be composable, not necessarily a composite.
 * Mandating TupleWritable isn't strictly correct.
 */
@SuppressWarnings("unchecked") // child types unknown
public RecordReader<K,TupleWritable> createRecordReader(InputSplit split, 
    TaskAttemptContext taskContext) 
    throws IOException, InterruptedException {
  setFormat(taskContext.getConfiguration());
  return root.createRecordReader(split, taskContext);
}
 
Example #22
Source File: AvroMultipleInputsKeyInputFormat.java    From datafu with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(
    InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException 
{    
  Schema readerSchema = AvroMultipleInputsUtil.getInputKeySchemaForSplit(context.getConfiguration(), split);
  if (readerSchema == null)
  {
    throw new RuntimeException("Could not determine input schema");
  }
  return new AvroKeyRecordReader<T>(readerSchema);
}
 
Example #23
Source File: JsonFileInputFormat.java    From jumbune with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public RecordReader<LongWritable, Text>  createRecordReader(InputSplit split, TaskAttemptContext context) {
	RecordReader<LongWritable, Text> recordReader = null;
	try 
    {
		recordReader = new JsonFileRecordReader(split, context);
    }
    catch (IOException ioe) 
    {
    	LOGGER.error(ioe);
    }
	return recordReader; 
}
 
Example #24
Source File: EmoInputFormat.java    From emodb with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<Text, Row> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException {
    FileSplit fileSplit  = (FileSplit) split;
    Path path = fileSplit.getPath();
    return new EmoRecordReader(BaseInputFormat.createRecordReader(context.getConfiguration(), path));
}
 
Example #25
Source File: FixedLengthInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<LongWritable, BytesWritable>
    createRecordReader(InputSplit split, TaskAttemptContext context)
    throws IOException, InterruptedException {
  int recordLength = getRecordLength(context.getConfiguration());
  if (recordLength <= 0) {
    throw new IOException("Fixed record length " + recordLength
        + " is invalid.  It should be set to a value greater than zero");
  }
  return new FixedLengthRecordReader(recordLength);
}
 
Example #26
Source File: PhoenixHBaseLoader.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    this.reader = reader;
    final String resourceSchemaAsStr = getValueFromUDFContext(this.contextSignature,RESOURCE_SCHEMA_SIGNATURE);
    if (resourceSchemaAsStr == null) {
        throw new IOException("Could not find schema in UDF context");
    }
   schema = (ResourceSchema)ObjectSerializer.deserialize(resourceSchemaAsStr); 
}
 
Example #27
Source File: PigAvroInputFormatAdaptor.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<NullWritable, Writable> createRecordReader(InputSplit split,
                                                               TaskAttemptContext context) throws IOException,
        InterruptedException
{
    return getDelegate(context.getConfiguration()).createRecordReader(split, context);
}
 
Example #28
Source File: FileSystemViewKeyInputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<E, Void> createRecordReader(InputSplit inputSplit,
    TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
  RecordReader<E, Void> unfilteredRecordReader = createUnfilteredRecordReader
      (inputSplit, taskAttemptContext);
  if (view != null) {
    // use the constraints to filter out entities from the reader
    return new FilteredRecordReader<E>(unfilteredRecordReader,
        ((AbstractRefinableView) view).getConstraints(), view.getAccessor());
  }
  return unfilteredRecordReader;
}
 
Example #29
Source File: CombineAvroKeyInputFormat.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
    throws IOException {
  Class cls = AvroKeyRecordReaderWrapper.class;

  return new CombineFileRecordReader<>((CombineFileSplit) split, context,
      (Class<? extends RecordReader<AvroKey<T>, NullWritable>>) cls);
}
 
Example #30
Source File: EthereumFormatHadoopTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
public void readEthereumBlockInputFormatBlock0to10() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName="eth0to10.bin";
	String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
	Path file = new Path(fileNameBlock);
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();

	List<InputSplit> splits = format.getSplits(job);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	assertEquals( 1, splits.size(),"Only one split generated for block 0..10");
	RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull( reader,"Format returned  null RecordReader");
	reader.initialize(splits.get(0),context);
	BytesWritable key = new BytesWritable();
	EthereumBlock block = new EthereumBlock();
	int count=0;
	while (count<11) {
		if (reader.nextKeyValue()) {
			count++;
		}
	}
	assertEquals(11,count,"Block 0..10 contains 11 blocks");

	assertFalse( reader.nextKeyValue(),"No further blocks in block 0..10");
	reader.close();
}