Java Code Examples for org.apache.hadoop.mapreduce.lib.input.FileSplit#getPath()

The following examples show how to use org.apache.hadoop.mapreduce.lib.input.FileSplit#getPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MapReduceParsedInputFormat.java From incubator-retired-mrql with Apache License 2.0

6 votes

public ParsedRecordReader ( FileSplit split,
                            TaskAttemptContext context,
                            Class<? extends Parser> parser_class,
                            Trees args ) throws IOException {
    Configuration conf = context.getConfiguration();
    start = split.getStart();
    end = start + split.getLength();
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);
    fsin = fs.open(split.getPath());
    try {
        parser = parser_class.newInstance();
    } catch (Exception ex) {
        throw new Error("Unrecognized parser:"+parser_class);
    };
    parser.initialize(args);
    parser.open(fsin,start,end);
    result = null;
}

Example 2

Source File: GryoRecordReader.java From tinkerpop with Apache License 2.0

6 votes

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    final FileSplit split = (FileSplit) genericSplit;
    final Configuration configuration = context.getConfiguration();
    if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
        this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
    this.gryoReader = GryoReader.build().mapper(
            GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create();
    long start = split.getStart();
    final Path file = split.getPath();
    if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
        throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
    }
    // open the file and seek to the start of the split
    this.inputStream = file.getFileSystem(configuration).open(split.getPath());
    this.splitLength = split.getLength();
    if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}

Example 3

Source File: WikipediaRecordReader.java From datawave with Apache License 2.0

6 votes

private void initializeSuperClass(InputSplit split, TaskAttemptContext context) throws IOException {
    super.initialize(split, context);
    if (split instanceof FileSplit) {
        FileSplit fs = (FileSplit) split;
        Path p = fs.getPath();
        rawFileName = p.getName();
        
        if (log.isDebugEnabled()) {
            log.debug("FileSplit Info: ");
            log.debug("Start: " + fs.getStart());
            log.debug("Length: " + fs.getLength());
            log.debug("Locations: " + Arrays.toString(fs.getLocations()));
            log.debug("Path: " + fs.getPath());
        }
    } else {
        throw new IOException("Input Split unhandled.");
    }
}

Example 4

Source File: Bzip2TextInputFormat.java From spork with Apache License 2.0

6 votes

public BZip2LineRecordReader(Configuration job, FileSplit split)
throws IOException {
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    fileIn.seek(start);

    in = new CBZip2InputStream(fileIn, 9, end);
    if (start != 0) {
        // skip first line and re-establish "start".
        // LineRecordReader.readLine(this.in, null);
        readLine(this.in, null);
        start = in.getPos();
    }
    pos = in.getPos();
}

Example 5

Source File: TestLocalRunner.java From big-c with Apache License 2.0

5 votes

protected void setup(Context context) {
  // Get the thread num from the file number.
  FileSplit split = (FileSplit) context.getInputSplit();
  Path filePath = split.getPath();
  String name = filePath.getName();
  this.threadId = Integer.valueOf(name);

  LOG.info("Thread " + threadId + " : "
      + context.getInputSplit());
}

Example 6

Source File: MapReduceGeneratorInputFormat.java From incubator-retired-mrql with Apache License 2.0

5 votes

public GeneratorRecordReader ( FileSplit split,
                               TaskAttemptContext context ) throws IOException {
    Configuration conf = context.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);
    reader = new SequenceFile.Reader(path.getFileSystem(conf),path,conf);
    MRContainer key = new MRContainer();
    MRContainer value = new MRContainer();
    reader.next(key,value);
    offset = ((MR_long)((Tuple)(value.data())).first()).get();
    size = ((MR_long)((Tuple)(value.data())).second()).get();
    index = -1;
}

Example 7

Source File: MultiLineInputFormat.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException
{
    numberOfLinesToProcess = getNumLinesPerSplit(context);
    FileSplit split = (FileSplit) genericSplit;
    final Path file = split.getPath();
    Configuration conf = context.getConfiguration();
    this.maxLineLength = conf
            .getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
    FileSystem fs = file.getFileSystem(conf);
    start = split.getStart();
    end = start + split.getLength();
    boolean skipFirstLine = false;
    FSDataInputStream filein = fs.open(split.getPath());

    if (start != 0) {
        skipFirstLine = true;
        --start;
        filein.seek(start);
    }
    in = new LineReader(filein, conf);
    if (skipFirstLine) {
        start += in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

Example 8

Source File: UnenclosedBaseJsonRecordReader.java From spatial-framework-for-hadoop with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext taskContext)
			throws IOException, InterruptedException {
	FileSplit fileSplit = (FileSplit)split;
	start = fileSplit.getStart();
	end = fileSplit.getLength() + start;
	Path filePath = fileSplit.getPath();
       commonInit(filePath, taskContext.getConfiguration());
}

Example 9

Source File: MainframeVBRecordReader.java From Cobol-to-Hive with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
		throws IOException {
	FileSplit split = (FileSplit) genericSplit;
	Configuration job = context.getConfiguration();
	final Path file = split.getPath();
	initialize(job, split.getStart(), split.getLength(), file);
}

Example 10

Source File: QseqInputFormat.java From Hadoop-BAM with MIT License

5 votes

public QseqRecordReader(Configuration conf, FileSplit split) throws IOException
{
	setConf(conf);
	file = split.getPath();
	start = split.getStart();
	end = start + split.getLength();

	FileSystem fs = file.getFileSystem(conf);
	FSDataInputStream fileIn = fs.open(file);

	CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
	CompressionCodec        codec        = codecFactory.getCodec(file);

	if (codec == null) // no codec.  Uncompressed file.
	{
		positionAtFirstRecord(fileIn);
		inputStream = fileIn;
	}
	else
	{ // compressed file
		if (start != 0)
			throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");

		inputStream = codec.createInputStream(fileIn);
		end = Long.MAX_VALUE; // read until the end of the file
	}

	lineReader = new LineReader(inputStream);
}

Example 11

Source File: ColumnarSplitDataReader.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public void init(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);

    FileSystem fs = FileSystem.get(context.getConfiguration());
    FileSplit fSplit = (FileSplit) split;
    Path path = fSplit.getPath();
    rowRecordReader = new RowRecordReader(cubeDesc, path, fs);
    metricsValuesBuffer = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);

    rowCount = new AtomicInteger(0);
}

Example 12

Source File: Excel2007FileRecordReader.java From components with Apache License 2.0

5 votes

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();

  final Path file = split.getPath();

  try {
    InputStream in = createInputStream(job, file);
    
    init4Excel2007(in, job, file);
  } catch (Exception e) {
    closeResource();
    throw e;
  }
}

Example 13

Source File: XmlInputFormat.java From hiped2 with Apache License 2.0

5 votes

public XmlRecordReader(FileSplit split, Configuration conf)
    throws IOException {
  startTag = conf.get(START_TAG_KEY).getBytes("UTF-8");
  endTag = conf.get(END_TAG_KEY).getBytes("UTF-8");

  // open the file and seek to the start of the split
  start = split.getStart();
  end = start + split.getLength();
  Path file = split.getPath();
  FileSystem fs = file.getFileSystem(conf);
  fsin = fs.open(split.getPath());
  fsin.seek(start);
}

Example 14

Source File: CsvBlurMapper.java From incubator-retired-blur with Apache License 2.0

5 votes

protected Path getCurrentFile(Context context) throws IOException {
  InputSplit split = context.getInputSplit();
  if (split != null && split instanceof FileSplit) {
    FileSplit inputSplit = (FileSplit) split;
    Path path = inputSplit.getPath();
    FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
    return path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
  }
  return null;
}

Example 15

Source File: MDSSpreadReader.java From multiple-dimension-spread with Apache License 2.0

5 votes

@Override
public void initialize( final InputSplit inputSplit, final TaskAttemptContext context ) throws IOException, InterruptedException {
  FileSplit fileSplit = (FileSplit)inputSplit;
  Configuration config = context.getConfiguration();
  Path path = fileSplit.getPath();
  FileSystem fs = path.getFileSystem( config );
  long fileLength = fs.getLength( path );
  long start = fileSplit.getStart();
  long length = fileSplit.getLength();
  InputStream in = fs.open( path );
  setStream( in , fileLength , start , length );
}

Example 16

Source File: MapperForNewData.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  InputSplit inputSplit = context.getInputSplit();
  FileSplit fileSplit = getFileSplit(inputSplit);
  Path path = fileSplit.getPath();
  FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
  FileStatus fileStatus = fileSystem.getFileStatus(path);
  _timestamp = fileStatus.getModificationTime();
  _newRecords = context.getCounter("Blur", "New Records Read");
}

Example 17

Source File: IndexRRecordReader.java From indexr with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException, UnsupportedOperationException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    FileSplit split = (FileSplit) inputSplit;
    Preconditions.checkState(split.getStart() == 0, "Segment should not splited");

    Path filePath = split.getPath();

    String sparkRequestedSchemaString = configuration.get(Config.SPARK_PROJECT_SCHEMA);
    this.sparkSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString);

    FileSystem fileSystem = filePath.getFileSystem(taskAttemptContext.getConfiguration());
    if (!SegmentHelper.checkSegmentByPath(filePath)) {
        logger.info("ignore: " + filePath);
        return;
    }

    ByteBufferReader.Opener opener = ByteBufferReader.Opener.create(fileSystem, filePath);
    IntegratedSegment.Fd fd = IntegratedSegment.Fd.create(filePath.toString(), opener);
    if (fd == null) {
        logger.warn("illegal segment: " + filePath);
        return;
    }
    segment = fd.open();
    if (segment == null) {
        logger.warn("illegal segment: " + filePath);
        return;
    }
    this.totalRowCount = segment.rowCount();
    this.packCount = segment.packCount();

    SegmentSchema segmentSchema = segment.schema();
    this.projectColumns = IndexRUtil.sparkSchemaToIndexRSchema(
            JavaConversions.seqAsJavaList(sparkSchema),
            a -> false)
            .getColumns().toArray(new ColumnSchema[0]);
    this.projectColIdsInSegment = new int[projectColumns.length];
    this.projectExists = new boolean[projectColumns.length];
    for (int i = 0; i < projectColumns.length; i++) {
        String columnName = projectColumns[i].getName();
        int colId = Trick.indexWhere(segmentSchema.getColumns(), cs -> cs.getName().equalsIgnoreCase(columnName));
        projectColIdsInSegment[i] = colId;
        projectExists[i] = colId >= 0;
    }
}

Example 18

Source File: CopybookRecordReader.java From CopybookInputFormat with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
    throws IOException, InterruptedException {

  String cblPath = context.getConfiguration().get(
      Const.COPYBOOK_INPUTFORMAT_CBL_HDFS_PATH_CONF);

  FileSystem fs = FileSystem.get(context.getConfiguration());

  BufferedInputStream inputStream = new BufferedInputStream(fs.open(new Path(
      cblPath)));

  CobolCopybookLoader copybookInt = new CobolCopybookLoader();
  try {
    externalRecord = copybookInt
        .loadCopyBook(inputStream, "RR", CopybookLoader.SPLIT_NONE, 0,
            "cp037", Convert.FMT_MAINFRAME, 0, null);

    int fileStructure = Constants.IO_FIXED_LENGTH;

    for (ExternalField field : externalRecord.getRecordFields()) {
      recordByteLength += field.getLen();
    }

    // jump to the point in the split that the first whole record of split
    // starts at
    FileSplit fileSplit = (FileSplit) split;

    start = fileSplit.getStart();
    end = start + fileSplit.getLength();
    final Path file = fileSplit.getPath();

    BufferedInputStream fileIn = new BufferedInputStream(fs.open(fileSplit
        .getPath()));

    if (start != 0) {
      pos = start - (start % recordByteLength) + recordByteLength;

      fileIn.skip(pos);
    }

    ret = LineIOProvider.getInstance().getLineReader(
        fileStructure,
        LineIOProvider.getInstance().getLineProvider(fileStructure));

    ret.open(fileIn, externalRecord);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

Example 19

Source File: LongLineEventRecordReader.java From datawave with Apache License 2.0

4 votes

/**
 * @param genericSplit
 * @param context
 * @throws IOException
 */
public void initializeLineReader(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    
    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LfLineReader(codec.createInputStream(fileIn), job);
        in.setNewLineIncluded(newLineIncluded);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        
        // Hadoop CodecFactory only checks the file suffix, let's double check for gzip since some data producers
        // may not append .gz to their files.
        InputStream iStream = GzipDetectionUtil.decompressTream(fileIn);
        Class streamClass = iStream.getClass();
        if (GZIPInputStream.class == streamClass) {
            end = Long.MAX_VALUE;
        }
        
        in = new LfLineReader(iStream, job);
        in.setNewLineIncluded(newLineIncluded);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

Example 20

Source File: ParquetInputSplit.java From parquet-mr with Apache License 2.0

2 votes

/**
 * Builds a {@code ParquetInputSplit} from a mapreduce {@link FileSplit}.
 *
 * @param split a mapreduce FileSplit
 * @return a ParquetInputSplit
 * @throws IOException if there is an error while creating the Parquet split
 */
static ParquetInputSplit from(FileSplit split) throws IOException {
  return new ParquetInputSplit(split.getPath(),
      split.getStart(), split.getStart() + split.getLength(),
      split.getLength(), split.getLocations(), null);
}