org.apache.commons.csv.CSVRecord Java Examples

The following examples show how to use org.apache.commons.csv.CSVRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SymSpellSearchBenchMark.java    From customized-symspell with MIT License 7 votes vote down vote up
private List<String> readQueries(String queryFile) {
  List<String> queries = new ArrayList<>();
  try {
    URL queryResourceUrl = this.getClass().getClassLoader().getResource(queryFile);
    CSVParser qparser = CSVParser
        .parse(queryResourceUrl, Charset.forName("UTF-8"),
            CSVFormat.DEFAULT.withDelimiter(' '));
    java.util.Iterator<CSVRecord> csvIterator = qparser.iterator();
    while (csvIterator.hasNext()) {
      CSVRecord csvRecord = csvIterator.next();
      queries.add(csvRecord.get(0));
    }
  } catch (IOException ex) {
    System.err.println("Error occured " + ex);
  }
  return queries;
}
 
Example #2
Source File: ExCommentCSVReader.java    From repositoryminer with Apache License 2.0 7 votes vote down vote up
private void readComments() throws IOException {
	List<CSVRecord> records = readCSV(COMMENTS_HEADER, config.getCommentsCSV());

	for (CSVRecord record : records) {
		Comment comment = new Comment(Integer.parseInt(record.get(0)),
				Double.parseDouble(record.get(1).replaceAll(",", ".")),
				Double.parseDouble(record.get(2).replaceAll(",", ".")),
				Double.parseDouble(record.get(3).replaceAll(",", ".")), record.get(4), record.get(6), record.get(7));

		String filename = FilenameUtils.normalize(record.get(5), true);

		
		if (!filesMap.containsKey(filename)) {
			filesMap.put(filename, new ArrayList<Integer>());
		}

		commentsMap.put(comment.getId(), comment);
		filesMap.get(filename).add(comment.getId());
	}
}
 
Example #3
Source File: RouterLinkReader.java    From maestro-java with Apache License 2.0 6 votes vote down vote up
/**
 * Reader of csv file
 * @param reader reader
 * @return readed data
 * @throws IOException implementation specific
 */
@Override
protected RouterLinkDataSet readReader(Reader reader) throws IOException {
    Iterable<CSVRecord> records = CSVFormat.RFC4180
            .withCommentMarker('#')
            .withFirstRecordAsHeader()
            .withRecordSeparator(';')
            .withQuote('"')
            .withQuoteMode(QuoteMode.NON_NUMERIC)
            .parse(reader);



    for (CSVRecord record : records) {
        try {
            routerLinkProcessor.process(record.get(0), record.get(1), record.get(2), record.get(3), record.get(4),
                    record.get(5), record.get(6), record.get(7), record.get(8), record.get(9), record.get(10),
                    record.get(11), record.get(12), record.get(13));
        } catch (Throwable t) {
            logger.warn("Unable to parse record: {}", t.getMessage(), t);
        }
    }

    return routerLinkProcessor.getRouterLinkDataSet();
}
 
Example #4
Source File: CSVFileManagerTest.java    From connector-sdk with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvFileManagerEmptyDateTimeFields() throws IOException {
  File tmpfile = temporaryFolder.newFile("testEmptyDateTimeFields.csv");
  createFile(tmpfile, UTF_8, testCSVSingleWithStructuredDataEmptyDateTime);
  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term,author");
  config.put(CONTENT_TITLE, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  setupConfig.initConfig(config);

  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CloseableIterable<CSVRecord> csvFile = csvFileManager.getCSVFile();
  CSVRecord csvRecord = getOnlyElement(csvFile);

  Multimap<String, Object> multimap = csvFileManager.generateMultiMap(csvRecord);
  assertEquals(Collections.emptyList(), multimap.get("updated"));
}
 
Example #5
Source File: PrimaryTumorToDOIDMapper.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
static PrimaryTumorToDOIDMapper createFromResource() throws IOException {
    final CSVParser parser = CSVParser.parse(TUMOR_LOCATION_MAPPING_CSV, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    Map<String, Set<String>> doidsPerPrimaryTumor = Maps.newHashMap();
    for (final CSVRecord record : parser) {
        final String primaryTumorLocation = record.get("primaryTumorLocation");
        final String doids = record.get("doids");

        doidsPerPrimaryTumor.put(primaryTumorLocation, toSet(doids));
    }

    return new PrimaryTumorToDOIDMapper(doidsPerPrimaryTumor);
}
 
Example #6
Source File: CSVFileManagerTest.java    From connector-sdk with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvFileManagerEncodingMismatch() throws IOException {
  String utf8euro = "\u20ac";
  File tmpfile = temporaryFolder.newFile("testEncoding.csv");
  createFile(tmpfile, Charset.forName("Cp1252"),
      "term, definition",
      "euro, symbol=" + utf8euro);

  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(CSVFileManager.FILE_ENCODING, UTF_8.name()); // Read using a different encoding
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CONTENT_TITLE, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  setupConfig.initConfig(config);
  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CSVRecord csvRecord = getOnlyElement(csvFileManager.getCSVFile());

  assertNotEquals("symbol=" + utf8euro, csvRecord.get("definition"));
  assertThat(csvRecord.get("definition"), endsWith(UTF_8.newDecoder().replacement()));
}
 
Example #7
Source File: CsvTableTransformer.java    From vividus with Apache License 2.0 6 votes vote down vote up
@Override
public String transform(String tableAsString, TableParsers tableParsers, TableProperties properties)
{
    checkTableEmptiness(tableAsString);
    String csvPath = ExtendedTableTransformer.getMandatoryNonBlankProperty(properties, "csvPath");
    try
    {
        List<CSVRecord> result = csvReader.readCsvFile(findResource(getClass(), csvPath));
        return ExamplesTableProcessor.buildExamplesTable(result.get(0).toMap().keySet(), extractValues(result),
                properties, true);
    }
    catch (IOException e)
    {
        throw new UncheckedIOException("Problem during CSV file reading", e);
    }
}
 
Example #8
Source File: CSVFileManagerTest.java    From connector-sdk with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvFileManagerEncodingUtf8() throws IOException {
  String utf8euro = "\u20ac";
  File tmpfile = temporaryFolder.newFile("testEncoding.csv");
  createFile(tmpfile, UTF_8,
      "term, definition",
      "euro, symbol=" + utf8euro);

  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(CSVFileManager.FILE_ENCODING, UTF_8.name());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CONTENT_TITLE, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  setupConfig.initConfig(config);
  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CSVRecord csvRecord = getOnlyElement(csvFileManager.getCSVFile());

  assertEquals("symbol=" + utf8euro,  csvRecord.get("definition"));
}
 
Example #9
Source File: GroupConfigCsvParser.java    From RepoSense with MIT License 6 votes vote down vote up
/**
 * Processes the csv file line by line and adds created {@code Group} into {@code results}.
 */
@Override
protected void processLine(List<GroupConfiguration> results, CSVRecord record) throws InvalidLocationException {
    String location = get(record, LOCATION_POSITION);
    String groupName = get(record, GROUP_NAME_POSITION);
    List<String> globList = getAsList(record, FILES_GLOB_POSITION);

    GroupConfiguration groupConfig = null;
    groupConfig = findMatchingGroupConfiguration(results, location);

    FileType group = new FileType(groupName, globList);
    if (groupConfig.containsGroup(group)) {
        logger.warning(String.format(
                "Skipping group as %s has already been specified for the repository %s",
                group.toString(), groupConfig.getLocation()));
        return;
    }

    groupConfig.addGroup(group);
}
 
Example #10
Source File: TestSpreadsheetExtractor.java    From tabula-java with MIT License 6 votes vote down vote up
@Test
public void testFindSpreadsheetsFromCells() throws IOException {

    CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"),
            Charset.forName("utf-8"),
            CSVFormat.DEFAULT);

    List<Cell> cells = new ArrayList<>();

    for (CSVRecord record : parse) {
        cells.add(new Cell(Float.parseFloat(record.get(0)),
                Float.parseFloat(record.get(1)),
                Float.parseFloat(record.get(2)),
                Float.parseFloat(record.get(3))));
    }


    List<Rectangle> expected = Arrays.asList(EXPECTED_RECTANGLES);
    Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER);
    List<Rectangle> foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells);
    Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER);
    assertTrue(foundRectangles.equals(expected));
}
 
Example #11
Source File: CSVParserToDataSetTransformer.java    From IridiumApplicationTesting with MIT License 6 votes vote down vote up
public DatasetsRootElement transform(@NotNull final CSVParser csvRecords) {
	checkNotNull(csvRecords);

	Set<String> headings = csvRecords.getHeaderMap().keySet();

	DatasetsRootElement datasetsRootElement = new DatasetsRootElement();

	for (CSVRecord record : csvRecords) {
		DataSet dataSet = new DataSet();
		List<Setting> settings = dataSet.getSettings();

		for (String heading : headings) {
			Setting setting = new Setting();
			setting.setName(heading);
			setting.setValue(record.get(heading));

			settings.add(setting);
		}

		datasetsRootElement.getDataSets().getDataSets().add(dataSet);
	}

	return datasetsRootElement;
}
 
Example #12
Source File: UIPParser.java    From rival with Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public DataModelIF<Long, Long> parseData(final File f) throws IOException {
    DataModelIF<Long, Long> dataset = new DataModel<>();
    Reader in = new InputStreamReader(new FileInputStream(f), "UTF-8");

    Iterable<CSVRecord> records;
    if (isHasHeader()) {
        records = CSVFormat.EXCEL.withDelimiter(getDelimiter()).withHeader().parse(in);
    } else {
        records = CSVFormat.EXCEL.withDelimiter(getDelimiter()).parse(in);
    }
    for (CSVRecord record : records) {
        long userID = Long.parseLong(record.get(getUserTok()));
        long itemID = Long.parseLong(record.get(getItemTok()));
        double preference = Double.parseDouble(record.get(getPrefTok()));
        dataset.addPreference(userID, itemID, preference);
    }
    in.close();
    return dataset;
}
 
Example #13
Source File: ElevateQueryComparer.java    From quaerite with Apache License 2.0 6 votes vote down vote up
private static QuerySet loadQueries(Path file) throws Exception {
    QuerySet querySet = new QuerySet();
    Matcher uc = Pattern.compile("[A-Z]").matcher("");
    try (InputStream is = Files.newInputStream(file)) {
        try (Reader reader = new InputStreamReader(new BOMInputStream(is), "UTF-8")) {
            Iterable<CSVRecord> records = CSVFormat.EXCEL
                    .withFirstRecordAsHeader().parse(reader);
            for (CSVRecord record : records) {
                String q = record.get("query");
                Integer c = Integer.parseInt(record.get("count"));
                if (querySet.queries.containsKey(q)) {
                    LOG.warn("duplicate queries?! >" + q + "<");
                }

                querySet.set(q, c);
            }
        }
    }
    LOG.info("loaded " + querySet.queries.size() + " queries");
    return querySet;
}
 
Example #14
Source File: CSVFileManagerTest.java    From connector-sdk with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvFileManagerCreateItemWithMultiKey() throws IOException {
  File tmpfile = temporaryFolder.newFile("testCreateItemWithMultiKey.csv");
  createFile(tmpfile, UTF_8, testCSVSingle);
  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term, definition");
  config.put(CONTENT_TITLE, "term");
  config.put(CONTENT_HIGH, "term,definition");
  setupConfig.initConfig(config);

  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CloseableIterable<CSVRecord> csvFile = csvFileManager.getCSVFile();
  CSVRecord csvRecord = getOnlyElement(csvFile);

  Item item = csvFileManager.createItem(csvRecord);
  assertEquals("moma search||Google internal search", item.getName());
  assertEquals(null, item.getAcl());
  assertEquals("moma search", item.getMetadata().getSourceRepositoryUrl());
}
 
Example #15
Source File: BulkUserImportReader.java    From development with Apache License 2.0 6 votes vote down vote up
@Override
public Iterator<Row> iterator() {
    return new Iterator<BulkUserImportReader.Row>() {
        Iterator<CSVRecord> i = csvParser.iterator();

        @Override
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Row next() {
            CSVRecord record = i.next();
            return new Row(record);
        }

        @Override
        public void remove() {
            i.remove();
        }
    };
}
 
Example #16
Source File: CSVFileManagerTest.java    From connector-sdk with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvFileManagerSkipHeaderFalseCsvFileWithOnlyHeaderNoRecord() throws IOException {
  File tmpfile = temporaryFolder.newFile("SkipHeaderFalseCsvFileWithOnlyHeaderNoRecord.csv");
  createFile(tmpfile, UTF_8, testCSVSingleWithOnlyHeaderNoRecords);
  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  config.put(CONTENT_TITLE, "term");
  config.put(CONTENT_HIGH, "term");
  config.put(CSVFileManager.SKIP_HEADER, "false");
  config.put(CSVFileManager.CSVCOLUMNS, "term,definition");
  setupConfig.initConfig(config);

  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CloseableIterable<CSVRecord> csvFile = csvFileManager.getCSVFile();
  CSVRecord csvRecord = getOnlyElement(csvFile);

  Item item = csvFileManager.createItem(csvRecord);
  assertEquals("term", item.getName());
  assertEquals(null, item.getAcl());
  assertEquals("term", item.getMetadata().getSourceRepositoryUrl());
}
 
Example #17
Source File: SerieDtoFromRecordBuilder.java    From CineLog with GNU General Public License v3.0 6 votes vote down vote up
public SerieDto build(CSVRecord csvRecord) throws ImportException {
    try {
        return new SerieDto(
                formatLong(getId(csvRecord)),
                formatLong(csvRecord.get("movie_id")),
                formatLong(csvRecord.get("review_id")),
                csvRecord.get("title"),
                formatDate(csvRecord.get("review_date")),
                csvRecord.get("review"),
                formatFloat(csvRecord.get("rating")),
                getMaxRating(csvRecord),
                csvRecord.get("poster_path"),
                csvRecord.get("overview"),
                formatInteger(csvRecord.get("year")),
                csvRecord.get("release_date")
        );
    } catch (ParseException e) {
        throw new ImportException(context.getString(R.string.import_parsing_line_error_toast, csvRecord.get("title")), e);
    }
}
 
Example #18
Source File: WorldbankIndicatorApiImpl.java    From cia with Apache License 2.0 6 votes vote down vote up
/**
 * Read csv content.
 *
 * @param is
 *            the is
 * @return the list
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 */
private static List<String> readCsvContent(final InputStream is) throws IOException {
	final BufferedReader reader = new BufferedReader(new InputStreamReader(is,StandardCharsets.UTF_8));
	for (int i = 0; i < IGNORE_TOP_HEADERS_LINE; i++) {
		final String ignoreFirstLinesWithHeaders = reader.readLine();
	}

	final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader().withDelimiter(','));
	final List<CSVRecord> records = parser.getRecords();
	records.remove(0);

	final List<String> list = new ArrayList<>();

	for (final CSVRecord csvRecord : records) {
		list.add(csvRecord.get("Indicator Code"));
	}

	return list;
}
 
Example #19
Source File: LegacyRateReader.java    From maestro-java with Apache License 2.0 6 votes vote down vote up
@Override
protected RateData readReader(Reader reader) throws IOException {
    Iterable<CSVRecord> records = CSVFormat.RFC4180
            .withCommentMarker('#')
            .withFirstRecordAsHeader()
            .withRecordSeparator(',')
            .withQuote('"')
            .withQuoteMode(QuoteMode.NON_NUMERIC)
            .parse(reader);

    for (CSVRecord record : records) {
        try {
            processor.process(record.get(0), record.get(1));
        } catch (Exception e) {
            logger.warn("Unable to parse record: {}", e.getMessage(), e);
        }
    }

    return processor.getRateData();
}
 
Example #20
Source File: LicenseStoreData.java    From LicenseScout with Apache License 2.0 6 votes vote down vote up
/**
 * Reads license name mappings from a CSV file.
 * 
 * @param inputStream an input stream to read the file contents from
 * @param log the logger
 * @throws IOException
 */
public void readNameMappings(final InputStream inputStream, final ILSLog log) throws IOException {
    final CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withCommentMarker('#');
    try (final BufferedReader br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
        final CSVParser csvParser = csvFormat.parse(br);
        for (final CSVRecord record : csvParser) {
            final String mappedName = record.get(0).trim();
            final int numLicenseIdentifiers = record.size() - 1;
            final List<License> licenses = new ArrayList<>();
            for (int i = 0; i < numLicenseIdentifiers; i++) {
                final String licenseIdentifier = record.get(i + 1).trim();
                final License license = getLicenseBySpdxIdentifier(licenseIdentifier);
                if (license != null) {
                    licenses.add(license);
                } else {
                    log.info("readNameMappings: license identifier not found: " + licenseIdentifier);
                }
            }
            nameMappings.put(mappedName, licenses);
        }
    }
}
 
Example #21
Source File: Hiscores.java    From rs-api with ISC License 6 votes vote down vote up
/**
 * Gets a {@link Player} based on their display name.
 * @param displayName The player's display name.
 * @param table The table of {@link Hiscores}.
 * @return An {@link Optional} containing the {@link Player}, or {@link Optional#empty()} if no {@link Player} was found with that name.
 * @throws IOException If an I/O error occurs.
 * @see <a href="https://runescape.wiki/w/Application_programming_interface#Hiscores_Lite">Hiscores Lite</a>
 * @see <a href="https://runescape.wiki/w/Application_programming_interface#Ironman_Lite">Ironman Hiscores Lite</a>
 * @see <a href="https://runescape.wiki/w/Application_programming_interface#Hardcore_Ironman_Lite">Hardcore Ironman Hiscores Lite</a>
 */
public Optional<Player> playerInformation(String displayName, HiscoreTable table) throws IOException {
	Preconditions.checkNotNull(displayName);
	Preconditions.checkNotNull(table);

	String escapedName = NAME_SPACER.matcher(displayName).replaceAll("+");
	String url = String.format(PLAYER_INFORMATION_URL_FORMAT, table.getName(), escapedName);
	ImmutableList<CSVRecord> records = client.fromCSV(url);

	ImmutableList<String> skillNames = table.getSkillNames();
	ImmutableList<String> activityNames = table.getActivityNames();

	if (records.size() >= (skillNames.size() + activityNames.size())) {
		ImmutableMap<String, Skill> skills = readSkills(records, skillNames);
		ImmutableMap<String, HiscoreActivity> activities = readActivities(records, skillNames, activityNames);
		return Optional.of(new Player(skills, activities));
	} else {
		return Optional.empty();
	}
}
 
Example #22
Source File: DataConversionHelper.java    From sagemaker-sparkml-serving-container with Apache License 2.0 6 votes vote down vote up
/**
 * Parses the input payload in CSV format to a list of Objects
 * @param csvInput, the input received from the request in CSV format
 * @param schema, the data schema retrieved from environment variable
 * @return List of Objects, where each Object correspond to one feature of the input data
 * @throws IOException, if there is an exception thrown in the try-with-resources block
 */
public List<Object> convertCsvToObjectList(final String csvInput, final DataSchema schema) throws IOException {
    try (final StringReader sr = new StringReader(csvInput)) {
        final List<Object> valueList = Lists.newArrayList();
        final CSVParser parser = CSVFormat.DEFAULT.parse(sr);
        // We don not supporting multiple CSV lines as input currently
        final CSVRecord record = parser.getRecords().get(0);
        final int inputLength = schema.getInput().size();
        for (int idx = 0; idx < inputLength; ++idx) {
            ColumnSchema sc = schema.getInput().get(idx);
            // For CSV input, each value is treated as an individual feature by default
            valueList.add(this.convertInputDataToJavaType(sc.getType(), DataStructureType.BASIC, record.get(idx)));
        }
        return valueList;
    }
}
 
Example #23
Source File: Loc.java    From core with GNU General Public License v3.0 6 votes vote down vote up
public static List<Loc> readLocs(String fileName) {
	List<Loc> locs = new ArrayList<Loc>();
	
	try {
		Reader in = new BufferedReader(new InputStreamReader(
				new FileInputStream(fileName), "UTF-8"));
		CSVFormat formatter = 
				CSVFormat.DEFAULT.withHeader().withCommentMarker('-');
		
		// Parse the file
		Iterable<CSVRecord> records = formatter.parse(in);
		Iterator<CSVRecord> iterator = records.iterator();
		while (iterator.hasNext()) {
			// Determine the record to process
			CSVRecord record = iterator.next();
			Loc loc = getLoc(record);
			if (loc.accuracy < MAX_ALLOWED_ACCURACY)
			locs.add(loc);
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
	
	return locs;
}
 
Example #24
Source File: CsvUpsertExecutor.java    From phoenix with Apache License 2.0 5 votes vote down vote up
public CsvUpsertExecutor(Connection conn, String tableName,
        List<ColumnInfo> columnInfoList, UpsertListener<CSVRecord> upsertListener,
        String arrayElementSeparator) {
    super(conn, tableName, columnInfoList, upsertListener);
    this.arrayElementSeparator = arrayElementSeparator;
    finishInit();
}
 
Example #25
Source File: TestUtils.java    From samoa with Apache License 2.0 5 votes vote down vote up
public static void assertResults(File outputFile, com.yahoo.labs.samoa.TestParams testParams) throws IOException {

        LOG.info("Checking results file " + outputFile.getAbsolutePath());
        // 1. parse result file with csv parser
        Reader in = new FileReader(outputFile);
        Iterable<CSVRecord> records = CSVFormat.EXCEL.withSkipHeaderRecord(false)
                .withIgnoreEmptyLines(true).withDelimiter(',').withCommentMarker('#').parse(in);
        CSVRecord last = null;
        Iterator<CSVRecord> iterator = records.iterator();
        CSVRecord header = iterator.next();
        Assert.assertEquals("Invalid number of columns", 5, header.size());

        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.EVALUATION_INSTANCES, header.get(0).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.CLASSIFIED_INSTANCES, header.get(1).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.CLASSIFICATIONS_CORRECT, header.get(2).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.KAPPA_STAT, header.get(3).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.KAPPA_TEMP_STAT, header.get(4).trim());

        // 2. check last line result
        while (iterator.hasNext()) {
            last = iterator.next();
        }

        assertTrue(String.format("Unmet threshold expected %d got %f",
                testParams.getEvaluationInstances(), Float.parseFloat(last.get(0))),
                testParams.getEvaluationInstances() <= Float.parseFloat(last.get(0)));
        assertTrue(String.format("Unmet threshold expected %d got %f", testParams.getClassifiedInstances(),
                Float.parseFloat(last.get(1))),
                testParams.getClassifiedInstances() <= Float.parseFloat(last.get(1)));
        assertTrue(String.format("Unmet threshold expected %f got %f",
                testParams.getClassificationsCorrect(), Float.parseFloat(last.get(2))),
                testParams.getClassificationsCorrect() <= Float.parseFloat(last.get(2)));
        assertTrue(String.format("Unmet threshold expected %f got %f",
                testParams.getKappaStat(), Float.parseFloat(last.get(3))),
                testParams.getKappaStat() <= Float.parseFloat(last.get(3)));
        assertTrue(String.format("Unmet threshold expected %f got %f",
                testParams.getKappaTempStat(), Float.parseFloat(last.get(4))),
                testParams.getKappaTempStat() <= Float.parseFloat(last.get(4)));

    }
 
Example #26
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) {
  /** convert from CSV row to DLP Table Row */
  Iterator<String> valueIterator = csvRow.iterator();
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  while (valueIterator.hasNext()) {
    String value = valueIterator.next();
    if (value != null) {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build());
    } else {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build());
    }
  }

  return tableRowBuilder.build();
}
 
Example #27
Source File: CsvReader.java    From jstarcraft-core with Apache License 2.0 5 votes vote down vote up
public CsvReader(InputStream inputStream, CodecDefinition definition) {
    super(definition);
    InputStreamReader buffer = new InputStreamReader(inputStream, StringUtility.CHARSET);
    try (CSVParser input = new CSVParser(buffer, FORMAT)) {
        Iterator<CSVRecord> iterator = input.iterator();
        if (iterator.hasNext()) {
            CSVRecord values = iterator.next();
            this.inputStream = values.iterator();
        }
    } catch (Exception exception) {
        throw new RuntimeException(exception);
    }
}
 
Example #28
Source File: HttpClient.java    From rs-api with ISC License 5 votes vote down vote up
/**
 * Deserializes a CSV file from a specified URL into an {@link ImmutableList} of {@link CSVRecord}s.
 * @param url The URL to deserialize from.
 * @return An {@link ImmutableList} of {@link CSVRecord}s.
 * @throws IOException If an I/O error occurs.
 */
@Override
public ImmutableList<CSVRecord> fromCSV(String url) throws IOException {
	Preconditions.checkNotNull(url);

	try (CSVParser parser = CSVParser.parse(stringFrom(url), CSV_FORMAT)) {
		return ImmutableList.copyOf(parser.getRecords());
	}
}
 
Example #29
Source File: SimpleCsvFileLookupService.java    From nifi with Apache License 2.0 5 votes vote down vote up
private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }

            final Map<String, String> properties = new HashMap<>();
            try (final InputStream is = new FileInputStream(csvFile)) {
                try (final InputStreamReader reader = new InputStreamReader(is, charset)) {
                    final Iterable<CSVRecord> records = csvFormat.withFirstRecordAsHeader().parse(reader);
                    for (final CSVRecord record : records) {
                        final String key = record.get(lookupKeyColumn);
                        final String value = record.get(lookupValueColumn);
                        if (StringUtils.isBlank(key)) {
                            throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                        } else if (!ignoreDuplicates && properties.containsKey(key)) {
                            throw new IllegalStateException("Duplicate lookup key encountered: " + key + " in " + csvFile);
                        } else if (ignoreDuplicates && properties.containsKey(key)) {
                            logger.warn("Duplicate lookup key encountered: {} in {}", new Object[]{key, csvFile});
                        }
                        properties.put(key, value);
                    }
                }
            }

            this.cache = new ConcurrentHashMap<>(properties);

            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}
 
Example #30
Source File: WideEcrfFileReader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static List<CSVRecord> readCsvSkipHeader(@NotNull String pathToCsv, char delimiter) throws IOException {
    CSVFormat format = CSVFormat.DEFAULT.withDelimiter(delimiter);
    CSVParser parser = format.parse(new BufferedReader(new InputStreamReader(new FileInputStream(pathToCsv))));

    List<CSVRecord> records = parser.getRecords();
    return records.subList(1, records.size());
}