java source code of ExcelParser

package com.codingchili.Model;

import com.codingchili.logging.ApplicationLogger;
import io.vertx.core.json.JsonObject;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.reactivestreams.*;

import java.io.*;
import java.util.*;
import java.util.function.Consumer;

/**
 * @author Robin Duda
 * <p>
 * Parses xlsx files into json objects.
 */
public class ExcelParser implements FileParser {
    public static final String INDEX = "index";
    private static final String OOXML = ".xlsx";
    private static final String XML97 = ".xls";
    private ApplicationLogger logger = new ApplicationLogger(getClass());
    private String fileName;
    private File file;
    private Workbook workbook;
    private Sheet sheet;
    private int columns;
    private int offset;
    private int rows;

    @Override
    public void setFileData(String localFileName, int offset, String fileName)
            throws ParserException, FileNotFoundException {

        file = new File(localFileName);
        offset -= 1; // convert excel row number to 0-based index.

        if (file.exists()) {
            try {
                this.workbook = getWorkbook(file, fileName);
                this.sheet = workbook.getSheetAt(0);
                this.offset = offset;
                this.fileName = fileName;
            } catch (Exception e) {
                if (e instanceof ParserException) {
                    throw (ParserException) e;
                } else {
                    throw new ParserException(e);
                }
            }
        } else {
            throw new FileNotFoundException(file.getAbsolutePath());
        }
    }

    @Override
    public Set<String> getSupportedFileExtensions() {
        return new HashSet<>(Arrays.asList(OOXML, XML97));
    }

    /**
     * Returns a workbook implementation based on the extension of the filname.
     *
     * @param file     stream representing a workbook
     * @param fileName the filename to determine a specific workbook implementation
     * @return a workbook implentation that supports the given file format
     * @throws ParserException when the file extension is unsupported
     * @throws IOException     when the given data is not a valid workbook
     */
    private Workbook getWorkbook(File file, String fileName) throws ParserException, IOException {
        if (fileName.endsWith(OOXML)) {
            try {
                return new XSSFWorkbook(file);
            } catch (InvalidFormatException e) {
                throw new ParserException(e);
            }
        } else if (fileName.endsWith(XML97)) {
            return new HSSFWorkbook(new FileInputStream(file));
        } else {
            throw new ParserException(
                    String.format("Unrecognized file extension for file %s, expected %s or %s.",
                            fileName, OOXML, XML97));
        }
    }

    @Override
    public void initialize() {
        logger.parsingFile(fileName, offset);

        this.columns = getColumnCount(sheet.getRow(offset));
        this.rows = getItemCount(sheet, offset);

        // parse all rows.
        readRows((json) -> {
            // skip storing the results of the parse.
        }, offset, rows, true);

        logger.parsedFile(rows - 1, fileName);
    }

    @Override
    public void subscribe(Subscriber<? super JsonObject> subscriber) {
        subscriber.onSubscribe(new Subscription() {
            private int index = 0;

            @Override
            public void request(long count) {
                readRows(subscriber::onNext, index, count, false);
                index += count;

                if (index >= rows) {
                    subscriber.onComplete();
                }
            }

            @Override
            public void cancel() {
                // send no more items!
            }
        });
    }

    @Override
    public void free() {
        try {
            workbook.close();
        } catch (IOException e) {
            logger.onError(e);
        }
    }

    @Override
    public int getNumberOfElements() {
        return rows;
    }

    /**
     * Reads the given range of rows and converts it to json.
     *
     * @param start    the starting element, 0 represents the first row after the row with the column titles.
     * @param count    the number of elements to read - can never read past the max number of rows.
     * @param consumer called with the produced JSON object for each parsed row.
     */
    private void readRows(Consumer<JsonObject> consumer, int start, long count, boolean dryRun) {
        String[] columns = getColumns(sheet.getRow(offset));

        for (int i = start; i < (count + start) && i < rows; i++) {
            consumer.accept(getRow(columns, sheet.getRow(i + offset + 1), dryRun));
        }
    }

    /**
     * retrieves the values of the column titles.
     *
     * @param row that points to the column titles.
     * @return an array of the titles
     */
    private String[] getColumns(Row row) {
        String[] titles = new String[columns];

        for (int i = 0; i < titles.length; i++) {
            titles[i] = row.getCell(i).getStringCellValue();
        }
        return titles;
    }

    /**
     * Returns the number of columns present on the given row.
     *
     * @param row the row to read column count from.
     * @return the number of columns on the given row
     */
    private int getColumnCount(Row row) {
        DataFormatter formatter = new DataFormatter();
        Iterator<Cell> iterator = row.iterator();
        int count = 0;

        while (iterator.hasNext()) {
            Cell cell = iterator.next();
            String value = formatter.formatCellValue(cell);

            if (value.length() > 0) {
                count++;
            } else {
                break;
            }
        }
        return count;
    }

    /**
     * counts the number of rows to be imported taking into account the offset
     * of the title columns.
     *
     * @param sheet  the sheet to read items from
     * @param offset the offset of the title columns
     * @return the number of rows minus the column title offset.
     */
    private int getItemCount(Sheet sheet, int offset) {
        int count = 0;
        Row row = sheet.getRow(offset + 1);

        while (row != null) {
            count++;
            row = sheet.getRow(offset + 1 + count);
        }

        return count;
    }

    /**
     * retrieves a row as a json object.
     *
     * @param titles the titles of the row.
     * @param row    the row to read values from.
     * @param dryRun if true no results will be generated and this method returns null.
     * @return a jsonobject that maps titles to the column values.
     */
    private JsonObject getRow(String[] titles, Row row, boolean dryRun) {
        DataFormatter formatter = new DataFormatter();
        JsonObject json = null;
        int index = 0;

        if (!dryRun) {
            json = new JsonObject();
        }

        for (int i = 0; i < row.getLastCellNum(); i++) {
            Cell cell = row.getCell(i);
            Object value = null;

            if (cell != null) {
                switch (cell.getCellTypeEnum()) {
                    case STRING:
                        value = formatter.formatCellValue(cell);
                        break;
                    case NUMERIC:
                        if (DateUtil.isCellDateFormatted(cell)) {
                            value = cell.getDateCellValue().toInstant().toString();
                        } else {
                            value = cell.getNumericCellValue();
                        }
                        break;
                }
                // avoid indexing null or empty string, fails to index rows
                // when date fields are empty and can lead to mappings being
                // set up incorrectly if leading rows has missing data.
                if (!dryRun && value != null && !(value.toString().length() == 0)) {
                    json.put(titles[index], value);
                }
            }
            index++;
        }
        return json;
    }
}