Java Code Examples for org.apache.flink.core.fs.FileInputSplit#getStart()

The following examples show how to use org.apache.flink.core.fs.FileInputSplit#getStart() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example 2
Source File: OrcRowInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private Tuple2<Long, Long> getOffsetAndLengthForSplit(FileInputSplit split, List<StripeInformation> stripes) {
	long splitStart = split.getStart();
	long splitEnd = splitStart + split.getLength();

	long readStart = Long.MAX_VALUE;
	long readEnd = Long.MIN_VALUE;

	for (StripeInformation s : stripes) {
		if (splitStart <= s.getOffset() && s.getOffset() < splitEnd) {
			// stripe starts in split, so it is included
			readStart = Math.min(readStart, s.getOffset());
			readEnd = Math.max(readEnd, s.getOffset() + s.getLength());
		}
	}

	if (readStart < Long.MAX_VALUE) {
		// at least one split is included
		return Tuple2.of(readStart, readEnd - readStart);
	} else {
		return Tuple2.of(0L, 0L);
	}
}
 
Example 3
Source File: OrcRowInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
private Tuple2<Long, Long> getOffsetAndLengthForSplit(FileInputSplit split, List<StripeInformation> stripes) {
	long splitStart = split.getStart();
	long splitEnd = splitStart + split.getLength();

	long readStart = Long.MAX_VALUE;
	long readEnd = Long.MIN_VALUE;

	for (StripeInformation s : stripes) {
		if (splitStart <= s.getOffset() && s.getOffset() < splitEnd) {
			// stripe starts in split, so it is included
			readStart = Math.min(readStart, s.getOffset());
			readEnd = Math.max(readEnd, s.getOffset() + s.getLength());
		}
	}

	if (readStart < Long.MAX_VALUE) {
		// at least one split is included
		return Tuple2.of(readStart, readEnd - readStart);
	} else {
		return Tuple2.of(0L, 0L);
	}
}
 
Example 4
Source File: AvroInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example 5
Source File: AvroInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example 6
Source File: ContinuousFileProcessingRescalingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) {
	Preconditions.checkNotNull(split);
	return new TimestampedFileInputSplit(
		modTime,
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames());
}
 
Example 7
Source File: DelimitedInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@PublicEvolving
@Override
public void reopen(FileInputSplit split, Long state) throws IOException {
	Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
	Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");
	Preconditions.checkArgument(state == -1 || state >= split.getStart(),
		" Illegal offset "+ state +", smaller than the splits start=" + split.getStart());

	try {
		this.open(split);
	} finally {
		this.offset = state;
	}

	if (state > this.splitStart + split.getLength()) {
		this.end = true;
	} else if (state > split.getStart()) {
		initBuffers();

		this.stream.seek(this.offset);
		if (split.getLength() == -1) {
			// this is the case for unsplittable files
			fillBuffer(0);
		} else {
			this.splitLength = this.splitStart + split.getLength() - this.offset;
			if (splitLength <= 0) {
				this.end = true;
			}
		}
	}
}
 
Example 8
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Opens an input stream to the file defined in the input format.
 * The stream is positioned at the beginning of the given split.
 * <p>
 * The stream is actually opened in an asynchronous thread to make sure any interruptions to the thread 
 * working on the input format do not reach the file system.
 */
@Override
public void open(FileInputSplit fileSplit) throws IOException {

	this.currentSplit = fileSplit;
	this.splitStart = fileSplit.getStart();
	this.splitLength = fileSplit.getLength();

	if (LOG.isDebugEnabled()) {
		LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]");
	}

	
	// open the split in an asynchronous thread
	final InputSplitOpenThread isot = new InputSplitOpenThread(fileSplit, this.openTimeout);
	isot.start();
	
	try {
		this.stream = isot.waitForCompletion();
		this.stream = decorateInputStream(this.stream, fileSplit);
	}
	catch (Throwable t) {
		throw new IOException("Error opening the Input Split " + fileSplit.getPath() + 
				" [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t);
	}
	
	// get FSDataInputStream
	if (this.splitStart != 0) {
		this.stream.seek(this.splitStart);
	}
}
 
Example 9
Source File: OrcRowInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(FileInputSplit fileSplit) throws IOException {
	LOG.debug("Opening ORC file {}", fileSplit.getPath());
	this.reader = new OrcRowSplitReader(
			conf,
			schema,
			selectedFields,
			conjunctPredicates,
			batchSize,
			fileSplit.getPath(),
			fileSplit.getStart(),
			fileSplit.getLength());
}
 
Example 10
Source File: HiveTableFileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static FileSplit toHadoopFileSplit(FileInputSplit fileSplit) throws IOException {
	URI uri = fileSplit.getPath().toUri();
	long length = fileSplit.getLength();
	// Hadoop FileSplit should not have -1 length.
	if (length == -1) {
		length = fileSplit.getPath().getFileSystem().getFileStatus(fileSplit.getPath()).getLen() -
				fileSplit.getStart();
	}
	return new FileSplit(new Path(uri), fileSplit.getStart(), length, (String[]) null);
}
 
Example 11
Source File: FlinkSequenceInputFormat.java    From incubator-retired-mrql with Apache License 2.0 5 votes vote down vote up
@Override
      public void open ( FileInputSplit split ) throws IOException {
          Path path = new Path(split.getPath().toString());
   if (Plan.conf == null)
Plan.conf = new Configuration();
          FileSystem fs = path.getFileSystem(Plan.conf);
          in = new SequenceFile.Reader(fs,path,Plan.conf);
          end = split.getStart()+split.getLength();
          if (split.getStart() > in.getPosition())
              in.sync(split.getStart());       // sync to start
          start = in.getPosition();
          more = start < end;
      }
 
Example 12
Source File: ContinuousFileProcessingRescalingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) {
	Preconditions.checkNotNull(split);
	return new TimestampedFileInputSplit(
		modTime,
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames());
}
 
Example 13
Source File: DelimitedInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@PublicEvolving
@Override
public void reopen(FileInputSplit split, Long state) throws IOException {
	Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
	Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");
	Preconditions.checkArgument(state == -1 || state >= split.getStart(),
		" Illegal offset "+ state +", smaller than the splits start=" + split.getStart());

	try {
		this.open(split);
	} finally {
		this.offset = state;
	}

	if (state > this.splitStart + split.getLength()) {
		this.end = true;
	} else if (state > split.getStart()) {
		initBuffers();

		this.stream.seek(this.offset);
		if (split.getLength() == -1) {
			// this is the case for unsplittable files
			fillBuffer(0);
		} else {
			this.splitLength = this.splitStart + split.getLength() - this.offset;
			if (splitLength <= 0) {
				this.end = true;
			}
		}
	}
}
 
Example 14
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Opens an input stream to the file defined in the input format.
 * The stream is positioned at the beginning of the given split.
 * <p>
 * The stream is actually opened in an asynchronous thread to make sure any interruptions to the thread 
 * working on the input format do not reach the file system.
 */
@Override
public void open(FileInputSplit fileSplit) throws IOException {

	this.currentSplit = fileSplit;
	this.splitStart = fileSplit.getStart();
	this.splitLength = fileSplit.getLength();

	if (LOG.isDebugEnabled()) {
		LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]");
	}

	
	// open the split in an asynchronous thread
	final InputSplitOpenThread isot = new InputSplitOpenThread(fileSplit, this.openTimeout);
	isot.start();
	
	try {
		this.stream = isot.waitForCompletion();
		this.stream = decorateInputStream(this.stream, fileSplit);
	}
	catch (Throwable t) {
		throw new IOException("Error opening the Input Split " + fileSplit.getPath() + 
				" [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t);
	}
	
	// get FSDataInputStream
	if (this.splitStart != 0) {
		this.stream.seek(this.splitStart);
	}
}
 
Example 15
Source File: ContinuousFileProcessingRescalingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) {
	Preconditions.checkNotNull(split);
	return new TimestampedFileInputSplit(
		modTime,
		split.getSplitNumber(),
		split.getPath(),
		split.getStart(),
		split.getLength(),
		split.getHostnames());
}
 
Example 16
Source File: DelimitedInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@PublicEvolving
@Override
public void reopen(FileInputSplit split, Long state) throws IOException {
	Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
	Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");
	Preconditions.checkArgument(state == -1 || state >= split.getStart(),
		" Illegal offset "+ state +", smaller than the splits start=" + split.getStart());

	try {
		this.open(split);
	} finally {
		this.offset = state;
	}

	if (state > this.splitStart + split.getLength()) {
		this.end = true;
	} else if (state > split.getStart()) {
		initBuffers();

		this.stream.seek(this.offset);
		if (split.getLength() == -1) {
			// this is the case for unsplittable files
			fillBuffer(0);
		} else {
			this.splitLength = this.splitStart + split.getLength() - this.offset;
			if (splitLength <= 0) {
				this.end = true;
			}
		}
	}
}
 
Example 17
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Opens an input stream to the file defined in the input format.
 * The stream is positioned at the beginning of the given split.
 * <p>
 * The stream is actually opened in an asynchronous thread to make sure any interruptions to the thread 
 * working on the input format do not reach the file system.
 */
@Override
public void open(FileInputSplit fileSplit) throws IOException {

	this.currentSplit = fileSplit;
	this.splitStart = fileSplit.getStart();
	this.splitLength = fileSplit.getLength();

	if (LOG.isDebugEnabled()) {
		LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]");
	}

	
	// open the split in an asynchronous thread
	final InputSplitOpenThread isot = new InputSplitOpenThread(fileSplit, this.openTimeout);
	isot.start();
	
	try {
		this.stream = isot.waitForCompletion();
		this.stream = decorateInputStream(this.stream, fileSplit);
	}
	catch (Throwable t) {
		throw new IOException("Error opening the Input Split " + fileSplit.getPath() + 
				" [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t);
	}
	
	// get FSDataInputStream
	if (this.splitStart != 0) {
		this.stream.seek(this.splitStart);
	}
}