org.apache.hadoop.mapreduce.lib.input.FileSplit Scala Examples

The following examples show how to use org.apache.hadoop.mapreduce.lib.input.FileSplit. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: HadoopFileLinesReader.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.Closeable
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl


class HadoopFileLinesReader(
    file: PartitionedFile, conf: Configuration) extends Iterator[Text] with Closeable {
  private val iterator = {
    val fileSplit = new FileSplit(
      new Path(new URI(file.filePath)),
      file.start,
      file.length,
      // TODO: Implement Locality
      Array.empty)
    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
    val reader = new LineRecordReader()
    reader.initialize(fileSplit, hadoopAttemptContext)
    new RecordReaderIterator(reader)
  }

  override def hasNext: Boolean = iterator.hasNext

  override def next(): Text = iterator.next()

  override def close(): Unit = iterator.close()
} 
Example 2
Source File: TFRecordInputFormat.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.utils.tf

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, NullWritable}
import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
import org.apache.hadoop.fs.FSDataInputStream

class TFRecordInputFormat extends FileInputFormat[BytesWritable, NullWritable] {
  override def createRecordReader(inputSplit: InputSplit, context: TaskAttemptContext):
  RecordReader[BytesWritable, NullWritable] = new RecordReader[BytesWritable, NullWritable] {

    private var inputStream: FSDataInputStream = null
    private var reader: TFRecordIterator = null
    private var length: Long = 0L
    private var begin: Long = 0L
    private var current: Array[Byte] = null


    override def getCurrentKey: BytesWritable = {
      new BytesWritable(current)
    }

    override def getProgress: Float = {
      (inputStream.getPos - begin) / (length + 1e-6f)
    }

    override def nextKeyValue(): Boolean = {
      if (reader.hasNext) {
        current = reader.next()
        true
      } else {
        false
      }
    }

    override def getCurrentValue: NullWritable = {
      NullWritable.get()
    }

    override def initialize(split: InputSplit, context: TaskAttemptContext): Unit = {
      val conf = context.getConfiguration
      val fileSplit = split.asInstanceOf[FileSplit]
      length = fileSplit.getLength
      begin = fileSplit.getStart

      val file = fileSplit.getPath
      val fs = file.getFileSystem(conf)
      inputStream = fs.open(file, 4096)
      reader = new TFRecordIterator(inputStream)
    }

    override def close(): Unit = {
      inputStream.close()
    }
  }

  override protected def isSplitable(context: JobContext, filename: Path): Boolean = false
} 
Example 3
Source File: FileLocalityInputFormat.scala    From ArchiveSpark   with MIT License 5 votes vote down vote up
package org.archive.archivespark.sparkling.util

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{NullWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}

class FileLocalityInputFormat extends FileInputFormat[NullWritable, Text] {
  class FileLocalityRecordReader extends RecordReader[NullWritable, Text] {
    private var filePath: Text = new Text()
    private var read: Boolean = true

    override def initialize(split: InputSplit, context: TaskAttemptContext): Unit = {
      filePath.set(split.asInstanceOf[FileSplit].getPath.toString)
      read = false
    }

    override def nextKeyValue(): Boolean = {
      if (read) false
      else {
        read = true
        true
      }
    }

    override def getCurrentKey: NullWritable = NullWritable.get
    override def getCurrentValue: Text = filePath
    override def getProgress: Float = if (read) 1.0f else 0.0f
    override def close(): Unit = read = true
  }

  override def isSplitable(context: JobContext, filename: Path): Boolean = false
  override def createRecordReader(split: InputSplit, context: TaskAttemptContext): RecordReader[NullWritable, Text] = new FileLocalityRecordReader
} 
Example 4
Source File: InputFormatConf.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.hadoop

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{ FileSystem, Path }
import org.apache.hadoop.io.{ LongWritable, Text, Writable }
import org.apache.hadoop.mapreduce.{ InputFormat, InputSplit, Job, RecordReader }
import org.apache.hadoop.mapreduce.lib.input.{ FileInputFormat, FileSplit, TextInputFormat }

import scala.collection.immutable

trait InputFormatConf[K, V] extends Serializable {
  type IF <: InputFormat[K, V]
  type Split <: InputSplit with Writable

  type KExtract <: Extract[K]
  type VExtract <: Extract[V]

  def kExtract: KExtract
  def vExtract: VExtract

  def makeInputFormat(): IF

  // I'm unsure if we should WriSer them for them
  def makeSplits(hadoopConf: Configuration): IndexedSeq[WriSer[Split]]

  // TODO do we want to require typing of the RecordReader as well?
  final def createRecordReader(hadoopConf: Configuration, split: Split,
    inputFormat: IF = makeInputFormat()): RecordReader[K, V] = {
    val tac = ConfOnlyTAC(hadoopConf)
    val recordReader = inputFormat.createRecordReader(split, tac)
    recordReader.initialize(split, tac)
    recordReader
  }
}

case class TextInputFormatConf(file: String, partitions: Int)
  extends InputFormatConf[LongWritable, Text] {
  type IF = TextInputFormat
  type Split = FileSplit

  // TODO now that we figured out what's up, see if we can't eliminate the need for this...
  val internalK = Extract.unit[LongWritable]
  val internalV = Extract.text

  type KExtract = internalK.type
  type VExtract = internalV.type

  override val kExtract: KExtract = internalK
  override val vExtract: VExtract = internalV

  def makeInputFormat() = new TextInputFormat()
  def makeSplits(hadoopConf: Configuration): immutable.IndexedSeq[WriSer[FileSplit]] = {
    val job = Job.getInstance(hadoopConf)
    FileInputFormat.setInputPaths(job, file)
    val path = new Path(file)
    val len = FileSystem.get(hadoopConf).listStatus(path).head.getLen
    val size_per = math.round(len / partitions.toDouble)

    ((0 until partitions - 1).map { p =>
      new FileSplit(path, size_per * p, size_per, null)
    } :+ {
      val fin = size_per * (partitions - 1)
      new FileSplit(path, fin, len - fin, null)
    }).map(WriSer(_))
  }
}

// TODO do we really get much from having this as its own class? consider just making a def csv method in TextInputFormatConf
object CSVInputFormatConf {
  def apply[V](ifc: InputFormatConf[LongWritable, V] { type Split = FileSplit }): InputFormatConf[LongWritable, V] {
    type IF = ifc.IF
    type Split = ifc.Split
    type KExtract = ifc.KExtract
    type VExtract = ifc.VExtract
  } = new InputFormatConf[LongWritable, V] {
    type IF = ifc.IF
    type Split = ifc.Split
    type KExtract = ifc.KExtract
    type VExtract = ifc.VExtract

    override val kExtract: KExtract = ifc.kExtract
    override val vExtract: VExtract = ifc.vExtract

    override def makeInputFormat() = ifc.makeInputFormat()
    override def makeSplits(hadoopConf: Configuration) = {
      val splits = ifc.makeSplits(hadoopConf)
      splits.headOption.fold(IndexedSeq.empty[WriSer[Split]]) {
        case WriSer(head) =>
          val rr = createRecordReader(hadoopConf, head)
          require(rr.nextKeyValue, "csv has no header, first line was empty")
          val afterHeader = rr.getCurrentKey.get
          require(rr.nextKeyValue, "first split is empty")
          WriSer(new FileSplit(head.getPath, afterHeader, head.getLength - afterHeader, null)) +:
            splits.tail
      }
    }
  }
} 
Example 5
Source File: HadoopFileLinesReader.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.Closeable
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl


class HadoopFileLinesReader(
    file: PartitionedFile,
    lineSeparator: Option[Array[Byte]],
    conf: Configuration) extends Iterator[Text] with Closeable {

  def this(file: PartitionedFile, conf: Configuration) = this(file, None, conf)

  private val iterator = {
    val fileSplit = new FileSplit(
      new Path(new URI(file.filePath)),
      file.start,
      file.length,
      // TODO: Implement Locality
      Array.empty)
    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)

    val reader = lineSeparator match {
      case Some(sep) => new LineRecordReader(sep)
      // If the line separator is `None`, it covers `\r`, `\r\n` and `\n`.
      case _ => new LineRecordReader()
    }

    reader.initialize(fileSplit, hadoopAttemptContext)
    new RecordReaderIterator(reader)
  }

  override def hasNext: Boolean = iterator.hasNext

  override def next(): Text = iterator.next()

  override def close(): Unit = iterator.close()
} 
Example 6
Source File: HadoopFileExcelReader.scala    From spark-hadoopoffice-ds   with Apache License 2.0 5 votes vote down vote up
package org.zuinnote.spark.office.excel

import java.io.Closeable
import java.net.URI

import org.apache.spark.sql.execution.datasources._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{ FileSplit, LineRecordReader }
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl

import org.apache.spark.sql.execution.datasources.RecordReaderIterator

import org.zuinnote.hadoop.office.format.mapreduce.ExcelFileInputFormat
import org.zuinnote.hadoop.office.format.mapreduce.ExcelRecordReader

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log


class HadoopFileExcelReader(
  file: PartitionedFile, conf: Configuration) extends Iterator[ArrayWritable] with Closeable {
  val LOG = LogFactory.getLog(classOf[HadoopFileExcelReader])
  private var reader: RecordReader[Text, ArrayWritable] = null
  private val iterator = {
    val fileSplit = new FileSplit(
      new Path(new URI(file.filePath)), file.start, file.length, Array.empty) // todo: implement locality (replace Array.empty with the locations)
    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
    val inputFormat = new ExcelFileInputFormat()
    reader = inputFormat.createRecordReader(fileSplit, hadoopAttemptContext)
    reader.initialize(fileSplit, hadoopAttemptContext)
    new RecordReaderIterator(reader)
  }
  
  def getReader: RecordReader[Text, ArrayWritable] = reader

  override def hasNext: Boolean = iterator.hasNext

  override def next(): ArrayWritable = iterator.next()

  override def close(): Unit = {
    if (reader != null) {
      reader.close()
    }
  }
} 
Example 7
Source File: OsmRecordReader.scala    From magellan   with Apache License 2.0 5 votes vote down vote up
package magellan.mapreduce

import magellan.io.{OsmKey, OsmShape, OsmNode, OsmWay, OsmRelation}
import org.apache.hadoop.mapreduce.lib.input.FileSplit
import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}
import scala.xml.{XML, Elem, Node}

private[magellan] class OsmRecordReader
  extends RecordReader[OsmKey, OsmShape] {
 
  val definedNodeLabels = Set("node", "way", "relation")
  var nodes : Seq[Node] = _
  
  var current : Int = 0
  lazy val total = nodes.length
  
  override def initialize(genericSplit: InputSplit, context: TaskAttemptContext) : Unit = {
    val split: FileSplit = genericSplit.asInstanceOf[FileSplit]
    val job = MapReduceUtils.getConfigurationFromContext(context)
    
    val file = split.getPath()
    val fs = file.getFileSystem(job)
    val fileIn = fs.open(file)
    
    val doc = XML.load(fileIn)
    fileIn.close()
    nodes = doc.child.filter(n => definedNodeLabels contains n.label)
  }
  
  override def nextKeyValue() : Boolean = {
    if (!nodes.isEmpty) { 
      if (current != 0) nodes = nodes.tail
      current += 1
    }
    !nodes.isEmpty
  }
  
  override def getCurrentKey() : OsmKey = {
    val current = nodes.head
    new OsmKey(current.label, (current \ "@id").text)
  }
  
  def getTags(shape: Node) = {
    (shape \ "tag").map(t => (t \ "@k").text -> (t \ "@v").text).toMap
  }
  
  def getOsmNode(shape: Node) = {
    new OsmNode(
        (shape \ "@id").text,
        (shape \ "@lat").text.toDouble,
        (shape \ "@lon").text.toDouble,
        getTags(shape))
  }
  
  def getOsmWay(shape: Node) = {
    new OsmWay((shape \ "@id").text, (shape \ "nd").map(w => (w \ "@ref").text), getTags(shape))
  }
  
  def getOsmRelation(shape: Node) = {
    new OsmRelation(
        (shape \ "@id").text,
        (shape \ "member").map(r => (r \ "@ref").text), getTags(shape)
    )
  }
  
  override def getCurrentValue() : OsmShape = {
    val current = nodes.head
    current.label match {
      case "node" => getOsmNode(current)
      case "way" => getOsmWay(current)
      case "relation" => getOsmRelation(current)
    }
  }
  
  override def getProgress() : Float = {
    current.toFloat / total
  }
  
  override def close() : Unit = { }
} 
Example 8
Source File: ShapefileReader.scala    From magellan   with Apache License 2.0 5 votes vote down vote up
package magellan.mapreduce

import java.io.DataInputStream

import org.apache.commons.io.EndianUtils
import org.apache.hadoop.mapreduce.lib.input.FileSplit
import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}

import magellan.io.{ShapeKey, ShapeWritable}

private[magellan] class ShapefileReader extends RecordReader[ShapeKey, ShapeWritable] {

  private val key: ShapeKey = new ShapeKey()

  private var value: ShapeWritable = _

  private var dis: DataInputStream = _

  private var remaining: BigInt = _

  override def getProgress: Float = 0

  override def nextKeyValue(): Boolean = {
    if (remaining <= 0) {
      false
    } else {
      // record header has fixed length of 8 bytes
      // byte 0 = record #, byte 4 = content length
      val recordNumber = dis.readInt()
      // record numbers begin at 1
      require(recordNumber > 0)
      val contentLength = 2 * (dis.readInt() + 4)
      value.readFields(dis)
      remaining -= contentLength
      key.setRecordIndex(key.getRecordIndex() + 1)
      true
    }
  }

  override def getCurrentValue: ShapeWritable = value

  override def initialize(inputSplit: InputSplit, taskAttemptContext: TaskAttemptContext) {
    val split = inputSplit.asInstanceOf[FileSplit]
    val job = MapReduceUtils.getConfigurationFromContext(taskAttemptContext)

    val path = split.getPath()
    val fs = path.getFileSystem(job)
    val is = fs.open(path)

    val (start, end) = {
      val v = split.getStart
      if (v == 0) {
        is.seek(24)
        (100L, 2 * is.readInt().toLong)
      } else {
        (v, v + split.getLength)
      }
    }

    is.seek(start)
    dis = new DataInputStream(is)
    key.setFileNamePrefix(split.getPath.getName.split("\\.")(0))
    value = new ShapeWritable()
    remaining = (end - start)
  }

  override def getCurrentKey: ShapeKey = key

  override def close(): Unit = dis.close()

} 
Example 9
Source File: WholeFileReader.scala    From magellan   with Apache License 2.0 5 votes vote down vote up
package magellan.mapreduce

import java.io.InputStream

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory, Decompressor}
import org.apache.hadoop.io.{NullWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.FileSplit
import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}

class WholeFileReader extends RecordReader[NullWritable, Text] {

  private val key = NullWritable.get()
  private val value = new Text()
  private var split: FileSplit = _
  private var conf: Configuration = _
  private var path: Path = _
  private var done: Boolean = false

  override def getProgress: Float = ???

  override def nextKeyValue(): Boolean = {
    if (done){
      false
    } else {
      val fs = path.getFileSystem(conf)
      var is: FSDataInputStream = null
      var in: InputStream = null
      var decompressor: Decompressor = null
      try {
        is = fs.open(split.getPath)
        val codec = new CompressionCodecFactory(conf).getCodec(path)
        if (codec != null) {
          decompressor = CodecPool.getDecompressor(codec)
          in = codec.createInputStream(is, decompressor)
        } else {
          in = is
        }
        val result = IOUtils.toByteArray(in)
        value.clear()
        value.set(result)
        done = true
        true
      } finally {
        if (in != null) {
          IOUtils.closeQuietly(in)
        }
        if (decompressor != null) {
          CodecPool.returnDecompressor(decompressor)
        }
      }
    }
  }

  override def getCurrentValue: Text = value

  override def initialize(inputSplit: InputSplit,
    taskAttemptContext: TaskAttemptContext): Unit = {
    this.split = inputSplit.asInstanceOf[FileSplit]
    this.conf = MapReduceUtils.getConfigurationFromContext(taskAttemptContext)
    this.path = this.split.getPath
  }

  override def getCurrentKey: NullWritable = key

  override def close() {}
} 
Example 10
Source File: HadoopFileLinesReader.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.Closeable
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl


class HadoopFileLinesReader(
    file: PartitionedFile, conf: Configuration) extends Iterator[Text] with Closeable {
  private val iterator = {
    val fileSplit = new FileSplit(
      new Path(new URI(file.filePath)),
      file.start,
      file.length,
      // TODO: Implement Locality
      Array.empty)
    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
    val reader = new LineRecordReader()
    reader.initialize(fileSplit, hadoopAttemptContext)
    new RecordReaderIterator(reader)
  }

  override def hasNext: Boolean = iterator.hasNext

  override def next(): Text = iterator.next()

  override def close(): Unit = iterator.close()
} 
Example 11
Source File: HadoopFileLinesReader.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.Closeable
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl


class HadoopFileLinesReader(
    file: PartitionedFile, conf: Configuration) extends Iterator[Text] with Closeable {
  private val iterator = {
    val fileSplit = new FileSplit(
      new Path(new URI(file.filePath)),
      file.start,
      file.length,
      // TODO: Implement Locality
      Array.empty)
    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
    val reader = new LineRecordReader()
    reader.initialize(fileSplit, hadoopAttemptContext)
    new RecordReaderIterator(reader)
  }

  override def hasNext: Boolean = iterator.hasNext

  override def next(): Text = iterator.next()

  override def close(): Unit = iterator.close()
} 
Example 12
Source File: HadoopFileLinesReader.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.Closeable
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl


class HadoopFileLinesReader(
    file: PartitionedFile, conf: Configuration) extends Iterator[Text] with Closeable {
  private val iterator = {
    val fileSplit = new FileSplit(
      new Path(new URI(file.filePath)),
      file.start,
      file.length,
      // TODO: Implement Locality
      Array.empty)
    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
    val reader = new LineRecordReader()
    reader.initialize(fileSplit, hadoopAttemptContext)
    new RecordReaderIterator(reader)
  }

  override def hasNext: Boolean = iterator.hasNext

  override def next(): Text = iterator.next()

  override def close(): Unit = iterator.close()
} 
Example 13
Source File: HadoopLineIterator.scala    From glow   with Apache License 2.0 5 votes vote down vote up
package io.projectglow.sql.util

import java.io.Closeable
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
import org.apache.spark.sql.execution.datasources.RecordReaderIterator

import io.projectglow.common.GlowLogging


class HadoopLineIterator(
    path: String,
    start: Long,
    length: Long,
    lineSeparator: Option[Array[Byte]],
    conf: Configuration)
    extends Iterator[Text]
    with Closeable
    with GlowLogging {

  private val iterator = {
    val fileSplit = new FileSplit(
      new Path(new URI(path)),
      start,
      length,
      // TODO: Implement Locality
      Array.empty
    )
    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)

    val reader = lineSeparator match {
      case Some(sep) => new LineRecordReader(sep)
      // If the line separator is `None`, it covers `\r`, `\r\n` and `\n`.
      case _ => new LineRecordReader()
    }

    reader.initialize(fileSplit, hadoopAttemptContext)
    new RecordReaderIterator(reader)
  }

  override def hasNext: Boolean = {
    iterator.hasNext
  }

  override def next(): Text = {
    iterator.next()
  }

  override def close(): Unit = {
    iterator.close()
  }
}