Java Code Examples for au.com.bytecode.opencsv.CSVReader#readNext()

The following examples show how to use au.com.bytecode.opencsv.CSVReader#readNext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CSVConfig.java    From micro-integrator with Apache License 2.0 6 votes vote down vote up
private String[] getHeader() throws IOException, DataServiceFault {
    if (!this.isHasHeader()) {
        return null;
    }

    CSVReader reader = null;
    try {
        reader = this.createCSVReader(this.getHeaderRow() - 1);
        return reader.readNext();
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                log.error("Error in closing CSV reader", e);
            }
        }
    }
}
 
Example 2
Source File: CVSRemoteFileFormatter.java    From AIDR with GNU Affero General Public License v3.0 6 votes vote down vote up
public List<MicromapperInput> getFileBaseImageClickerInputData(String csvFilename) throws Exception{
    //[Twitter username] // [Tweet message] // [optional: time-stamp] // [optional: location] // [optional: latitude] // [optional: longitude] // [image link]

    List<MicromapperInput> sourceSet = new ArrayList<MicromapperInput>();

    CSVReader csvReader = new CSVReader(new FileReader(csvFilename));
    String[] row = null;
    while ((row = csvReader.readNext()) != null) {
        if(row!=null){
            if(row.length > 8){
                MicromapperInput source = new MicromapperInput(row[8], row[1], row[0], row[5], row[6], row[7], row[2]);
                sourceSet.add(source);
            }
        }
    }

    csvReader.close();

    // REMOVEW HEADER
    if(sourceSet.size() > 1){
        sourceSet.remove(0);
    }

    return sourceSet;
}
 
Example 3
Source File: CSVModel.java    From datasync with MIT License 6 votes vote down vote up
private int addSamples(ControlFile controlFile) throws IOException{
    CSVReader reader = getCSVReader(controlFile, controlFile.getFileTypeControl().skip);
    String [] row =  reader.readNext();

    int rowsAdded = 0;
    while (row != null && rowsAdded < rowsToSample){
        // The consumers of this class assume a table with an equal number of columns in every row.
        // If the row is blank, we'll need to get a placeholder with as many columns as the others to allow the
        // control file editor the ability to load.
        if (isBlankRow(row)){
            insertData(getBlankPlaceholderRow(getColumnCount()));
        }
        else {
            insertData(row);
        }
        rowsAdded++;
        row = reader.readNext();
    }

    return rowsAdded;
}
 
Example 4
Source File: OpenCSVParserExample.java    From journaldev with MIT License 6 votes vote down vote up
private static List<Employee> parseCSVFileLineByLine() throws IOException {
	//create CSVReader object
	CSVReader reader = new CSVReader(new FileReader("employees.csv"), ',');
	
	List<Employee> emps = new ArrayList<Employee>();
	//read line by line
	String[] record = null;
	//skip header row
	reader.readNext();
	
	while((record = reader.readNext()) != null){
		Employee emp = new Employee();
		emp.setId(record[0]);
		emp.setName(record[1]);
		emp.setRole(record[2]);
		emp.setSalary(record[3]);
		emps.add(emp);
	}
	
	reader.close();
	
	System.out.println(emps);
	return emps;
}
 
Example 5
Source File: CsvReaderUtils.java    From collect-earth with MIT License 6 votes vote down vote up
private static boolean checkCsvReaderWorks(CSVReader csvReader) throws IOException {

		String[] csvRow = null;
				
		while ((csvRow = csvReader.readNext()) != null) {		
			if( csvRow.length == 1 && csvRow[0].trim().length() == 0 ){
				// This would be an empty line
				continue;
			} else if( csvRow.length == 1 && csvRow[0].trim().length() > 0){
				
				return false;
			}else if( csvRow.length < 3 ){
				return false;
			}else{
				return true;
			}
		}
		
		// If the script reaches this point it means that all the lines in the CSV file were empty!
		throw new IllegalArgumentException("The CSV/CED plot file has no data! All the lines are empty!");
	}
 
Example 6
Source File: Evaluator.java    From Siamese with GNU General Public License v3.0 6 votes vote down vote up
protected ArrayList<MethodClone> readCSV(String csvFile) {
    ArrayList<MethodClone> clones = new ArrayList<MethodClone>();
    try {
        /* copied from http://howtodoinjava.com/3rd-party/parse-read-write-csv-files-opencsv-tutorial/ */
        CSVReader reader = new CSVReader(new FileReader(csvFile), ',', '"', 1);
        //Read CSV line by line and use the string array as you want
        String[] nextLine;
        while ((nextLine = reader.readNext()) != null) {
            //Verifying the read data here
            if (nextLine.length == 3) {
                // create a clone method
                // fix the path name
                MethodClone mc = new MethodClone(nextLine[0], nextLine[1], nextLine[2]);
                // add to the list
                clones.add(mc);
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return clones;
}
 
Example 7
Source File: transCorrelationQtl.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
public static Map<String, String> readGte(final String gtePath) throws IOException {
	final HashMap<String, String> genotypeToExpression = new HashMap<>();
	CSVReader gteFileReader = new CSVReader(new InputStreamReader(new FileInputStream(gtePath), ENCODING), '\t', '\0', 0);
	String[] nextLine;
	while ((nextLine = gteFileReader.readNext()) != null) {

		final String genotypeId = nextLine[0];
		final String expressionId = nextLine[1];

		genotypeToExpression.put(genotypeId, expressionId);

	}
	return Collections.unmodifiableMap(genotypeToExpression);
}
 
Example 8
Source File: EvaluateCsv.java    From science-result-extractor with Apache License 2.0 5 votes vote down vote up
private static Map<String, String> loadFileScoreMap(File inpScore) throws IOException {
    Map<String, String> retMap = new HashMap<>();
    CSVReader csvReader = new CSVReader(new FileReader(inpScore));
    String[] row = csvReader.readNext();  // skip header
    while ((row = csvReader.readNext()) != null) {
        String file = row[0];
        String score = row[1];
        retMap.put(file, score);
    }
    csvReader.close();
    return retMap;
}
 
Example 9
Source File: CorrespondenceSet.java    From winter with Apache License 2.0 5 votes vote down vote up
/**
 * Loads correspondences from a file and adds them to this correspondence
 * set. Can be called multiple times.
 * 
 * @param correspondenceFile	the file to load from
 * @param first					the dataset that contains the records
 * @throws IOException			thrown if there is a problem loading the file
 */
public void loadCorrespondences(File correspondenceFile,
		FusibleDataSet<RecordType, SchemaElementType> first)
		throws IOException {
	CSVReader reader = new CSVReader(new FileReader(correspondenceFile));

	String[] values = null;
	int skipped = 0;

	while ((values = reader.readNext()) != null) {
		// check if the ids exist in the provided data sets
		if (first.getRecord(values[0]) == null) {
			skipped++;
			continue;
		}
		
		// we only have the records from the source data sets, so we group by the id in the target data set
		RecordGroup<RecordType, SchemaElementType> grp2 = recordIndex.get(values[1]);

		if (grp2 == null) {
			// no existing groups, create a new one
			RecordGroup<RecordType, SchemaElementType> grp = groupFactory.createRecordGroup();
			grp.addRecord(values[0], first);
			recordIndex.put(values[1], grp);
			groups.add(grp);
		} else {
			// one existing group, add to this group
			grp2.addRecord(values[0], first);
			recordIndex.put(values[0], grp2);
		}
	}

	reader.close();
	
	if (skipped>0) {
		logger.error(String.format("Skipped %,d records (not found in provided dataset)", skipped));
	}
}
 
Example 10
Source File: CSVModel.java    From datasync with MIT License 5 votes vote down vote up
private void updateColumnNames(ControlFile file) throws IOException {
    boolean hasHeaderRow = file.getFileTypeControl().hasHeaderRow;
    CSVReader headerReader = getCSVReader(file, 0);
    String[] row = headerReader.readNext();

    if (hasHeaderRow) {
        columnNames = row;
    }
    else{
        columnNames = generatePlaceholderNames(row.length);
    }
    fireTableStructureChanged();
}
 
Example 11
Source File: DataSet.java    From aifh with Apache License 2.0 5 votes vote down vote up
/**
 * Load a CSV from an input stream.
 *
 * @param is The input stream.
 * @return The loaded file.
 */
public static DataSet load(final InputStream is) {
    final DataSet result;

    try {
        final Reader reader = new InputStreamReader(is);
        final CSVReader csv = new CSVReader(reader);

        final String[] headers = csv.readNext();

        result = new DataSet(headers);

        String[] nextLine;
        while ((nextLine = csv.readNext()) != null) {
            if (nextLine.length <= 1) {
                continue;
            } else if (nextLine.length != result.getHeaderCount()) {
                throw new AIFHError("Found a CSV line with "
                        + nextLine.length + " columns, when expecting " + result.getHeaderCount());
            }
            final Object[] obj = new Object[result.getHeaderCount()];
            System.arraycopy(nextLine, 0, obj, 0, nextLine.length);
            result.add(obj);
        }
        csv.close();
    } catch (IOException ex) {
        throw (new AIFHError(ex));
    }

    return result;
}
 
Example 12
Source File: SasFileReaderUnitTest.java    From parso with Apache License 2.0 5 votes vote down vote up
@Test
public void testData() {
    long programStart = System.currentTimeMillis();
    InputStream fileInputStream = getResourceAsStream(fileName);
    logger.info("Processing file {}", fileName);
    Writer writer = new StringWriter();
    InputStreamReader inputStreamReader = new InputStreamReader(
            getResourceAsStream(fileName.toLowerCase().replace("sas7bdat", "csv")));
    try {
        SasFileReader sasFileReader = new SasFileReaderImpl(fileInputStream);
        long rowCount = sasFileReader.getSasFileProperties().getRowCount();
        List<Column> columns = sasFileReader.getColumns();
        CSVReader controlReader = new CSVReader(inputStreamReader);
        CSVDataWriter csvDataWriter = new CSVDataWriterImpl(writer, ",", "\n", Locale.UK);
        controlReader.readNext();
        for (int i = 0; i < rowCount; i++) {
            csvDataWriter.writeRow(sasFileReader.getColumns(), sasFileReader.readNext());
            if (i != 0 && i % COMPARE_ROWS_COUNT == 0) {
                compareResultWithControl(controlReader, writer, i - COMPARE_ROWS_COUNT, columns);
                ((StringWriter) writer).getBuffer().setLength(0);
            }
        }
        compareResultWithControl(controlReader, writer, (int) (rowCount - rowCount % COMPARE_ROWS_COUNT), columns);
        assertThat(controlReader.readNext()).isNull();
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    } finally {
        closeWriter(writer);
        closeInputStream(fileInputStream);
        closeInputStreamReader(inputStreamReader);
    }
    logger.info("Time passed: {} ms", System.currentTimeMillis() - programStart);
}
 
Example 13
Source File: ChangelogTestCase.java    From ontopia with Apache License 2.0 4 votes vote down vote up
private static void importCSV(Statement stm, String table, String file,
                              boolean load_data)
  throws IOException, SQLException {
  // first, get rid of the table if it's already there
  try {
    stm.executeUpdate("drop table " + table);
  } catch (SQLException e) {
    // table wasn't there. never mind
  }
    
  // open the CSV file
  String csv = TestFileUtils.getTransferredTestInputFile(testdataDirectory, "in", "sync", file).getPath();
  FileReader reader = new FileReader(csv);
  CSVReader csvreader = new CSVReader(reader, ';', '"');

  // read the first line to get the column names
  String[] colnames = csvreader.readNext();
  String[] columndefs = new String[colnames.length];
  for (int ix = 0; ix < colnames.length; ix++)
    columndefs[ix] = colnames[ix] + " varchar";

  // now we can create the table
  stm.executeUpdate("create table " + table + " (" +
                    StringUtils.join(columndefs, ", ") + ")");

  // are we just creating the table, or should we load the data?
  if (!load_data)
    return;

  // ok, now insert the actual data
  String cols = StringUtils.join(colnames, ", ");
  String[] tuple = csvreader.readNext();
  while (tuple != null) {
    String[] values = new String[tuple.length];
    for (int ix = 0; ix < tuple.length; ix++)
      values[ix] = "'" + tuple[ix] + "'"; // escaping? hah!
        
    stm.executeUpdate("insert into " + table + " (" + cols + ") values (" +
                      StringUtils.join(values, ", ") + ")");
        
    tuple = csvreader.readNext();
  }
}
 
Example 14
Source File: SasFileReaderUnitTest.java    From parso with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartialReadingOfColumns() {
    long programStart = System.currentTimeMillis();
    InputStream fileInputStream = getResourceAsStream(fileName);
    logger.info("Processing file {}", fileName);
    Writer writer = new StringWriter();
    InputStreamReader inputStreamReader = new InputStreamReader(
            getResourceAsStream(fileName.toLowerCase().replace("sas7bdat", "csv")));

    List<String> columnNames = new ArrayList<String>() {{
        add("x1");
        add("x5");
        add("x8");
    }};

    try {
        SasFileReader sasFileReader = new SasFileReaderImpl(fileInputStream);
        long rowCount = sasFileReader.getSasFileProperties().getRowCount();
        CSVReader controlReader = new CSVReader(inputStreamReader);
        CSVDataWriter csvDataWriter = new CSVDataWriterImpl(writer, ",", "\n", Locale.UK);
        controlReader.readNext();
        for (int i = 0; i < rowCount; i++) {
            csvDataWriter.writeRow(sasFileReader.getColumns(columnNames), sasFileReader.readNext(columnNames));
        }
        CSVReader resultReader = new CSVReader(new StringReader(writer.toString()));
        for (int i = 0; i < rowCount; i++) {
            String[] controlRow = controlReader.readNext();
            String[] resultRow = resultReader.readNext();
            assertThat(resultRow.length).isEqualTo(columnNames.size());
            assertThat(resultRow[0]).isEqualTo(controlRow[0]);
            assertThat(resultRow[1]).isEqualTo(controlRow[4]);
            assertThat(resultRow[2]).isEqualTo(controlRow[7]);
        }
        assertThat(controlReader.readNext()).isNull();
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    } finally {
        closeWriter(writer);
        closeInputStream(fileInputStream);
        closeInputStreamReader(inputStreamReader);
    }
    logger.info("Time passed: {} ms", System.currentTimeMillis() - programStart);
}
 
Example 15
Source File: CSVHandler.java    From gsn with GNU General Public License v3.0 4 votes vote down vote up
public ArrayList<TreeMap<String, Serializable>> parseValues(Reader datainput, long previousCheckPoint) throws IOException {
    ArrayList<TreeMap<String, Serializable>> toReturn = new ArrayList<TreeMap<String, Serializable>>();
    CSVReader reader = new CSVReader(datainput, getSeparator(), getStringSeparator(), getSkipFirstXLines());
    String[] values = null;
    long currentLine = 0;
    while ((values = reader.readNext()) != null) {
        TreeMap<String, Serializable> se = convertTo(formats, fields, getNulls(), values, getSeparator());
        if (isEmpty(se))
            continue;
        if (se.containsKey(TIMESTAMP)) {
        	//System.out.println("times "+se.get(TIMESTAMP)+"--"+previousCheckPoint);
            if (((Long) se.get(TIMESTAMP)) <= previousCheckPoint)
                continue;
        } else {// assuming useCounterForCheckPoint = true

            if (logger.isDebugEnabled()) {
                String symbol = (currentLine < previousCheckPoint) ? " < " : " >= ";
                logger.debug("currentLine=" + currentLine + symbol + "checkpoint=" + previousCheckPoint);
            }

            if (currentLine < previousCheckPoint) {// skipping already read lines, based on line count
                logger.debug("skipping");
                currentLine++;
                continue;
            }

        }
        toReturn.add(se);
        currentLine++;
        loggedNoChange = false;
        if (toReturn.size() > 250)
            break; // Move outside the loop as in each call we only read 250 values;
    }
    if (logger.isDebugEnabled() && toReturn.size() == 0 && loggedNoChange == false) {
        logger.debug("There is no new item after most recent checkpoint(previousCheckPoint:" + new DateTime(previousCheckPoint) + ").");
        loggedNoChange = true;
    }

    reader.close();
    return toReturn;
}
 
Example 16
Source File: SurveySettingsService.java    From JDeSurvey with GNU Affero General Public License v3.0 4 votes vote down vote up
@Transactional(readOnly = false)
public void importDatasetItems (CSVReader csvReader, Long datasetId,Boolean ignoreFirstRow) {
	try {
		DataSet	 dataSet = dataSetDAO.findById(datasetId);
		dataSetItemDAO.deleteByDataSetId(datasetId);				

		Integer order = 1;
		short valueFieldIndex = 0;
		short textFieldIndex = 1;
		Boolean autoGenerateValues = false;
		DataSetItem dataSetItem;

		String [] nextLine;
		while ((nextLine = csvReader.readNext()) != null) {
			//skip first row
			if (ignoreFirstRow) {
				//Will skip the first row the continue on with loop
				ignoreFirstRow=false;
				continue;}
			if (order == 1) { // check the first Row
				if (nextLine.length == 1)  {
					//only one column
					autoGenerateValues = true;
					textFieldIndex = 0;
				}
				else {
					//more than one column use the first two 
					autoGenerateValues = false;
					if (nextLine[0].trim().length() >  nextLine[1].trim().length()) {
						//invert the indexes
						valueFieldIndex = 1;
						textFieldIndex = 0;
					} 
				}
			}
			if (autoGenerateValues) {
				dataSetItem = new DataSetItem(dataSet,order,order.toString(),nextLine[textFieldIndex].trim());
			}
			else{
				dataSetItem = new DataSetItem(dataSet,order,nextLine[valueFieldIndex].trim(),nextLine[textFieldIndex].trim());
			}
			dataSetItemDAO.merge(dataSetItem);
			order++;
		}
	}
	catch (Exception e) {
		log.error(e.getMessage(), e);
		throw new RuntimeException(e);
	}
}
 
Example 17
Source File: QueryBinaryInteraction.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private static Pair<LinkedHashSet<InteractoinQuery>, Boolean> loadInteractionQueries(File queryFile) throws FileNotFoundException, IOException, Exception {

		LinkedHashSet<InteractoinQuery> interactionQueries = new LinkedHashSet<InteractoinQuery>();
		final CSVReader queryReader = new CSVReader(new FileReader(queryFile), '\t', '\0');

		String[] nextLine = queryReader.readNext();

		int variantCol = -1;
		int geneCol = -1;
		int covariateCol = -1;

		//Parse header
		for (int i = 0; i < nextLine.length; ++i) {
			String headerEntry = nextLine[i].toLowerCase();
			switch (headerEntry) {
				case "variant":
					if (variantCol != -1) {
						throw new Exception("Variant column found twice");
					}
					variantCol = i;
					break;
				case "gene":
					if (geneCol != -1) {
						throw new Exception("Gene column found twice");
					}
					geneCol = i;
					break;
				case "covariate":
					if (covariateCol != -1) {
						throw new Exception("Covariate column found twice");
					}
					covariateCol = i;
					break;

			}

		}

		if (variantCol == -1 && geneCol == -1 && covariateCol == -1) {
			throw new Exception("Did not detect appropiate header in query file");

		}

		while ((nextLine = queryReader.readNext()) != null) {
			String variant = null;
			String gene = null;
			String covariate = null;

			if (variantCol != -1) {
				variant = nextLine[variantCol];
			}
			if (geneCol != -1) {
				gene = nextLine[geneCol];
			}
			if (covariateCol != -1) {
				covariate = nextLine[covariateCol];
			}
			interactionQueries.add(new InteractoinQuery(variant, gene, covariate));
		}
		queryReader.close();

		return new Pair(interactionQueries, variantCol == -1 && geneCol == -1);
	}
 
Example 18
Source File: DatasetUtils.java    From datasync with MIT License 4 votes vote down vote up
public static List<List<String>> getDatasetSample(UserPreferences userPrefs, Dataset dataset, int rowsToSample) throws URISyntaxException, IOException, HttpException {
    String justDomain = getDomainWithoutScheme(userPrefs);
    URI absolutePath = new URIBuilder()
        .setScheme("https")
        .setHost(justDomain)
        .setPath("/resource/" + dataset.getId() + ".csv")
        .addParameter("$limit",""+rowsToSample)
        .build();

    ResponseHandler<String> handler = new ResponseHandler<String>() {
        @Override
        public String handleResponse(
            final HttpResponse response) throws ClientProtocolException, IOException {
            StatusLine statusLine = response.getStatusLine();
            int status = statusLine.getStatusCode();
            if (status >= 200 && status < 300) {
                HttpEntity entity = response.getEntity();
                return entity != null ? EntityUtils.toString(entity) : null;
            } else {
                throw new ClientProtocolException(statusLine.toString());
            }
        }
    };

    HttpUtility util = new HttpUtility(userPrefs, true);
    String sample = util.get(absolutePath, "application/csv", handler);
    util.close();

    CSVReader reader = new CSVReader(new StringReader(sample));

    List<List<String>> results = new ArrayList<>();

    Set<String> expectedFieldNames = new HashSet<String>();
    for(Column c : dataset.getColumns()) {
        expectedFieldNames.add(c.getFieldName());
    }
    String[] row = reader.readNext();
    boolean[] keep = new boolean[row.length];
    for(int i = 0; i != row.length; ++i) {
        keep[i] = expectedFieldNames.contains(row[i]);
    }
    results.add(filter(keep, row));

    while((row = reader.readNext()) != null) {
        results.add(filter(keep, row));
    }

    return results;
}
 
Example 19
Source File: OxfordSampleFile.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private void loadSamples() throws IOException {
	CSVReader reader = new CSVReader(new InputStreamReader(new FileInputStream(sampleFile), Charset.forName("UTF-8")), ' ');
	try {
		// create col names index
		Map<String, Integer> colNamesIndex = new HashMap<String, Integer>();
		String[] colNames = reader.readNext();
		for(int i = 0; i < colNames.length; ++i) {
			colNamesIndex.put(colNames[i], i);
		}
		reader.readNext(); // skip datatypes row
		
		String[] tokens;
		while((tokens = reader.readNext()) != null) {

			String familyId = tokens[0];
			String sampleId = tokens[1];

			Map<String, Object> annotationValues = new LinkedHashMap<String, Object>();
			annotationValues.put(SAMPLE_MISSING_RATE_FLOAT, tokens[2].equals("NA") ? Float.NaN : Float.parseFloat(tokens[2]));


			for (String colName : sampleAnnotations.keySet()) {

				if (colName.equals(SAMPLE_MISSING_RATE_FLOAT)) {
					continue;//already done
				}

				SampleAnnotation annotation = sampleAnnotations.get(colName);

				Object value = null;
				String token = tokens[colNamesIndex.get(colName)];
				switch (annotation.getType()) {
					case STRING:
						value = token;
						break;
					case INTEGER:
						value = token.equals("NA") ? null : Integer.valueOf(token);
						break;
					case BOOLEAN:
						if (token.equals("-9") || token.equals("NA")) {
							value = null;
						} else {
							value = token.equals("1") ? true : false;
						}
						break;
					case FLOAT:
						value = token.equals("NA") || token.equals("-9") ? Float.NaN : Float.parseFloat(token);
						break;
					default:
						LOGGER.warn("Unsupported data type encountered for column [" + colName + "]");
				}


				annotationValues.put(colName, value);
			}

			samples.add(new Sample(sampleId, familyId, annotationValues));
		}
	} finally {
		IOUtils.closeQuietly(reader);
	}
}
 
Example 20
Source File: CorrespondenceSet.java    From winter with Apache License 2.0 4 votes vote down vote up
/**
 * Loads correspondences from a file and adds them to this correspondence
 * set. Can be called multiple times.
 * 
 * @param correspondenceFile	the to load from
 * @param first					the dataset that contains the records on the left-hand side of the correspondences
 * @param second				the dataset that contains the records on the right-hand side of the correspondences
 * @throws IOException			thrown if there is a problem loading the file
 */
public void loadCorrespondences(File correspondenceFile,
                                   FusibleDataSet<RecordType, SchemaElementType> first, FusibleDataSet<RecordType, SchemaElementType> second)
		throws IOException {
	CSVReader reader = new CSVReader(new FileReader(correspondenceFile));

	String[] values = null;

	while ((values = reader.readNext()) != null) {
		// check if the ids exist in the provided datasets
		if (first.getRecord(values[0]) == null) {
			logger.error(String.format(
					"Record %s not found in first dataset", values[0]));
			continue;
		}
		if (second.getRecord(values[1]) == null) {
			logger.error(String.format(
					"Record %s not found in second dataset", values[0]));
			continue;
		}

		// check if the ids already belong to any groups
		RecordGroup<RecordType, SchemaElementType> grp1 = recordIndex.get(values[0]);
		RecordGroup<RecordType, SchemaElementType> grp2 = recordIndex.get(values[1]);

		if (grp1 == null && grp2 == null) {
			// no existing groups, create a new one
			RecordGroup<RecordType, SchemaElementType> grp = groupFactory.createRecordGroup();
			grp.addRecord(values[0], first);
			grp.addRecord(values[1], second);
			recordIndex.put(values[0], grp);
			recordIndex.put(values[1], grp);
			groups.add(grp);
		} else if (grp1 != null && grp2 == null) {
			// one existing group, add to this group
			grp1.addRecord(values[1], second);
			recordIndex.put(values[1], grp1);
		} else if (grp1 == null && grp2 != null) {
			// one existing group, add to this group
			grp2.addRecord(values[0], first);
			recordIndex.put(values[0], grp2);
		} else {
			// two existing groups, merge
			grp1.mergeWith(grp2);

			for (String id : grp2.getRecordIds()) {
				recordIndex.put(id, grp1);
			}
		}
	}

	reader.close();
}