org.apache.commons.csv.CSVRecord Java Examples

The following examples show how to use org.apache.commons.csv.CSVRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: SymSpellSearchBenchMark.java From customized-symspell with MIT License

7 votes

private List<String> readQueries(String queryFile) {
  List<String> queries = new ArrayList<>();
  try {
    URL queryResourceUrl = this.getClass().getClassLoader().getResource(queryFile);
    CSVParser qparser = CSVParser
        .parse(queryResourceUrl, Charset.forName("UTF-8"),
            CSVFormat.DEFAULT.withDelimiter(' '));
    java.util.Iterator<CSVRecord> csvIterator = qparser.iterator();
    while (csvIterator.hasNext()) {
      CSVRecord csvRecord = csvIterator.next();
      queries.add(csvRecord.get(0));
    }
  } catch (IOException ex) {
    System.err.println("Error occured " + ex);
  }
  return queries;
}

Example #2

Source File: ExCommentCSVReader.java From repositoryminer with Apache License 2.0

7 votes

private void readComments() throws IOException {
	List<CSVRecord> records = readCSV(COMMENTS_HEADER, config.getCommentsCSV());

	for (CSVRecord record : records) {
		Comment comment = new Comment(Integer.parseInt(record.get(0)),
				Double.parseDouble(record.get(1).replaceAll(",", ".")),
				Double.parseDouble(record.get(2).replaceAll(",", ".")),
				Double.parseDouble(record.get(3).replaceAll(",", ".")), record.get(4), record.get(6), record.get(7));

		String filename = FilenameUtils.normalize(record.get(5), true);

		
		if (!filesMap.containsKey(filename)) {
			filesMap.put(filename, new ArrayList<Integer>());
		}

		commentsMap.put(comment.getId(), comment);
		filesMap.get(filename).add(comment.getId());
	}
}

Example #3

Source File: RouterLinkReader.java From maestro-java with Apache License 2.0

6 votes

/**
 * Reader of csv file
 * @param reader reader
 * @return readed data
 * @throws IOException implementation specific
 */
@Override
protected RouterLinkDataSet readReader(Reader reader) throws IOException {
    Iterable<CSVRecord> records = CSVFormat.RFC4180
            .withCommentMarker('#')
            .withFirstRecordAsHeader()
            .withRecordSeparator(';')
            .withQuote('"')
            .withQuoteMode(QuoteMode.NON_NUMERIC)
            .parse(reader);



    for (CSVRecord record : records) {
        try {
            routerLinkProcessor.process(record.get(0), record.get(1), record.get(2), record.get(3), record.get(4),
                    record.get(5), record.get(6), record.get(7), record.get(8), record.get(9), record.get(10),
                    record.get(11), record.get(12), record.get(13));
        } catch (Throwable t) {
            logger.warn("Unable to parse record: {}", t.getMessage(), t);
        }
    }

    return routerLinkProcessor.getRouterLinkDataSet();
}

Example #4

Source File: CSVFileManagerTest.java From connector-sdk with Apache License 2.0

6 votes

@Test
public void testCsvFileManagerEmptyDateTimeFields() throws IOException {
  File tmpfile = temporaryFolder.newFile("testEmptyDateTimeFields.csv");
  createFile(tmpfile, UTF_8, testCSVSingleWithStructuredDataEmptyDateTime);
  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term,author");
  config.put(CONTENT_TITLE, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  setupConfig.initConfig(config);

  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CloseableIterable<CSVRecord> csvFile = csvFileManager.getCSVFile();
  CSVRecord csvRecord = getOnlyElement(csvFile);

  Multimap<String, Object> multimap = csvFileManager.generateMultiMap(csvRecord);
  assertEquals(Collections.emptyList(), multimap.get("updated"));
}

Example #5

Source File: PrimaryTumorToDOIDMapper.java From hmftools with GNU General Public License v3.0

6 votes

@NotNull
static PrimaryTumorToDOIDMapper createFromResource() throws IOException {
    final CSVParser parser = CSVParser.parse(TUMOR_LOCATION_MAPPING_CSV, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    Map<String, Set<String>> doidsPerPrimaryTumor = Maps.newHashMap();
    for (final CSVRecord record : parser) {
        final String primaryTumorLocation = record.get("primaryTumorLocation");
        final String doids = record.get("doids");

        doidsPerPrimaryTumor.put(primaryTumorLocation, toSet(doids));
    }

    return new PrimaryTumorToDOIDMapper(doidsPerPrimaryTumor);
}

Example #6

Source File: CSVFileManagerTest.java From connector-sdk with Apache License 2.0

6 votes

@Test
public void testCsvFileManagerEncodingMismatch() throws IOException {
  String utf8euro = "\u20ac";
  File tmpfile = temporaryFolder.newFile("testEncoding.csv");
  createFile(tmpfile, Charset.forName("Cp1252"),
      "term, definition",
      "euro, symbol=" + utf8euro);

  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(CSVFileManager.FILE_ENCODING, UTF_8.name()); // Read using a different encoding
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CONTENT_TITLE, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  setupConfig.initConfig(config);
  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CSVRecord csvRecord = getOnlyElement(csvFileManager.getCSVFile());

  assertNotEquals("symbol=" + utf8euro, csvRecord.get("definition"));
  assertThat(csvRecord.get("definition"), endsWith(UTF_8.newDecoder().replacement()));
}

Example #7

Source File: CsvTableTransformer.java From vividus with Apache License 2.0

6 votes

@Override
public String transform(String tableAsString, TableParsers tableParsers, TableProperties properties)
{
    checkTableEmptiness(tableAsString);
    String csvPath = ExtendedTableTransformer.getMandatoryNonBlankProperty(properties, "csvPath");
    try
    {
        List<CSVRecord> result = csvReader.readCsvFile(findResource(getClass(), csvPath));
        return ExamplesTableProcessor.buildExamplesTable(result.get(0).toMap().keySet(), extractValues(result),
                properties, true);
    }
    catch (IOException e)
    {
        throw new UncheckedIOException("Problem during CSV file reading", e);
    }
}

Example #8

Source File: CSVFileManagerTest.java From connector-sdk with Apache License 2.0

6 votes

@Test
public void testCsvFileManagerEncodingUtf8() throws IOException {
  String utf8euro = "\u20ac";
  File tmpfile = temporaryFolder.newFile("testEncoding.csv");
  createFile(tmpfile, UTF_8,
      "term, definition",
      "euro, symbol=" + utf8euro);

  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(CSVFileManager.FILE_ENCODING, UTF_8.name());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CONTENT_TITLE, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  setupConfig.initConfig(config);
  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CSVRecord csvRecord = getOnlyElement(csvFileManager.getCSVFile());

  assertEquals("symbol=" + utf8euro,  csvRecord.get("definition"));
}

Example #9

Source File: GroupConfigCsvParser.java From RepoSense with MIT License

6 votes

/**
 * Processes the csv file line by line and adds created {@code Group} into {@code results}.
 */
@Override
protected void processLine(List<GroupConfiguration> results, CSVRecord record) throws InvalidLocationException {
    String location = get(record, LOCATION_POSITION);
    String groupName = get(record, GROUP_NAME_POSITION);
    List<String> globList = getAsList(record, FILES_GLOB_POSITION);

    GroupConfiguration groupConfig = null;
    groupConfig = findMatchingGroupConfiguration(results, location);

    FileType group = new FileType(groupName, globList);
    if (groupConfig.containsGroup(group)) {
        logger.warning(String.format(
                "Skipping group as %s has already been specified for the repository %s",
                group.toString(), groupConfig.getLocation()));
        return;
    }

    groupConfig.addGroup(group);
}

Example #10

Source File: TestSpreadsheetExtractor.java From tabula-java with MIT License

6 votes

@Test
public void testFindSpreadsheetsFromCells() throws IOException {

    CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"),
            Charset.forName("utf-8"),
            CSVFormat.DEFAULT);

    List<Cell> cells = new ArrayList<>();

    for (CSVRecord record : parse) {
        cells.add(new Cell(Float.parseFloat(record.get(0)),
                Float.parseFloat(record.get(1)),
                Float.parseFloat(record.get(2)),
                Float.parseFloat(record.get(3))));
    }


    List<Rectangle> expected = Arrays.asList(EXPECTED_RECTANGLES);
    Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER);
    List<Rectangle> foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells);
    Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER);
    assertTrue(foundRectangles.equals(expected));
}

Example #11

Source File: CSVParserToDataSetTransformer.java From IridiumApplicationTesting with MIT License

6 votes

public DatasetsRootElement transform(@NotNull final CSVParser csvRecords) {
	checkNotNull(csvRecords);

	Set<String> headings = csvRecords.getHeaderMap().keySet();

	DatasetsRootElement datasetsRootElement = new DatasetsRootElement();

	for (CSVRecord record : csvRecords) {
		DataSet dataSet = new DataSet();
		List<Setting> settings = dataSet.getSettings();

		for (String heading : headings) {
			Setting setting = new Setting();
			setting.setName(heading);
			setting.setValue(record.get(heading));

			settings.add(setting);
		}

		datasetsRootElement.getDataSets().getDataSets().add(dataSet);
	}

	return datasetsRootElement;
}

Example #12

Source File: UIPParser.java From rival with Apache License 2.0

6 votes

/**
 * {@inheritDoc}
 */
@Override
public DataModelIF<Long, Long> parseData(final File f) throws IOException {
    DataModelIF<Long, Long> dataset = new DataModel<>();
    Reader in = new InputStreamReader(new FileInputStream(f), "UTF-8");

    Iterable<CSVRecord> records;
    if (isHasHeader()) {
        records = CSVFormat.EXCEL.withDelimiter(getDelimiter()).withHeader().parse(in);
    } else {
        records = CSVFormat.EXCEL.withDelimiter(getDelimiter()).parse(in);
    }
    for (CSVRecord record : records) {
        long userID = Long.parseLong(record.get(getUserTok()));
        long itemID = Long.parseLong(record.get(getItemTok()));
        double preference = Double.parseDouble(record.get(getPrefTok()));
        dataset.addPreference(userID, itemID, preference);
    }
    in.close();
    return dataset;
}

Example #13

Source File: ElevateQueryComparer.java From quaerite with Apache License 2.0

6 votes

private static QuerySet loadQueries(Path file) throws Exception {
    QuerySet querySet = new QuerySet();
    Matcher uc = Pattern.compile("[A-Z]").matcher("");
    try (InputStream is = Files.newInputStream(file)) {
        try (Reader reader = new InputStreamReader(new BOMInputStream(is), "UTF-8")) {
            Iterable<CSVRecord> records = CSVFormat.EXCEL
                    .withFirstRecordAsHeader().parse(reader);
            for (CSVRecord record : records) {
                String q = record.get("query");
                Integer c = Integer.parseInt(record.get("count"));
                if (querySet.queries.containsKey(q)) {
                    LOG.warn("duplicate queries?! >" + q + "<");
                }

                querySet.set(q, c);
            }
        }
    }
    LOG.info("loaded " + querySet.queries.size() + " queries");
    return querySet;
}

Example #14

Source File: CSVFileManagerTest.java From connector-sdk with Apache License 2.0

6 votes

@Test
public void testCsvFileManagerCreateItemWithMultiKey() throws IOException {
  File tmpfile = temporaryFolder.newFile("testCreateItemWithMultiKey.csv");
  createFile(tmpfile, UTF_8, testCSVSingle);
  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term, definition");
  config.put(CONTENT_TITLE, "term");
  config.put(CONTENT_HIGH, "term,definition");
  setupConfig.initConfig(config);

  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CloseableIterable<CSVRecord> csvFile = csvFileManager.getCSVFile();
  CSVRecord csvRecord = getOnlyElement(csvFile);

  Item item = csvFileManager.createItem(csvRecord);
  assertEquals("moma search||Google internal search", item.getName());
  assertEquals(null, item.getAcl());
  assertEquals("moma search", item.getMetadata().getSourceRepositoryUrl());
}

Example #15

Source File: BulkUserImportReader.java From development with Apache License 2.0

6 votes

@Override
public Iterator<Row> iterator() {
    return new Iterator<BulkUserImportReader.Row>() {
        Iterator<CSVRecord> i = csvParser.iterator();

        @Override
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Row next() {
            CSVRecord record = i.next();
            return new Row(record);
        }

        @Override
        public void remove() {
            i.remove();
        }
    };
}

Example #16

Source File: CSVFileManagerTest.java From connector-sdk with Apache License 2.0

6 votes

@Test
public void testCsvFileManagerSkipHeaderFalseCsvFileWithOnlyHeaderNoRecord() throws IOException {
  File tmpfile = temporaryFolder.newFile("SkipHeaderFalseCsvFileWithOnlyHeaderNoRecord.csv");
  createFile(tmpfile, UTF_8, testCSVSingleWithOnlyHeaderNoRecords);
  Properties config = new Properties();
  config.put(CSVFileManager.FILEPATH, tmpfile.getAbsolutePath());
  config.put(UrlBuilder.CONFIG_COLUMNS, "term");
  config.put(CSVFileManager.UNIQUE_KEY_COLUMNS, "term");
  config.put(CONTENT_TITLE, "term");
  config.put(CONTENT_HIGH, "term");
  config.put(CSVFileManager.SKIP_HEADER, "false");
  config.put(CSVFileManager.CSVCOLUMNS, "term,definition");
  setupConfig.initConfig(config);

  CSVFileManager csvFileManager = CSVFileManager.fromConfiguration();
  CloseableIterable<CSVRecord> csvFile = csvFileManager.getCSVFile();
  CSVRecord csvRecord = getOnlyElement(csvFile);

  Item item = csvFileManager.createItem(csvRecord);
  assertEquals("term", item.getName());
  assertEquals(null, item.getAcl());
  assertEquals("term", item.getMetadata().getSourceRepositoryUrl());
}

Example #17

Source File: SerieDtoFromRecordBuilder.java From CineLog with GNU General Public License v3.0

6 votes

public SerieDto build(CSVRecord csvRecord) throws ImportException {
    try {
        return new SerieDto(
                formatLong(getId(csvRecord)),
                formatLong(csvRecord.get("movie_id")),
                formatLong(csvRecord.get("review_id")),
                csvRecord.get("title"),
                formatDate(csvRecord.get("review_date")),
                csvRecord.get("review"),
                formatFloat(csvRecord.get("rating")),
                getMaxRating(csvRecord),
                csvRecord.get("poster_path"),
                csvRecord.get("overview"),
                formatInteger(csvRecord.get("year")),
                csvRecord.get("release_date")
        );
    } catch (ParseException e) {
        throw new ImportException(context.getString(R.string.import_parsing_line_error_toast, csvRecord.get("title")), e);
    }
}

Example #18

Source File: WorldbankIndicatorApiImpl.java From cia with Apache License 2.0

6 votes

/**
 * Read csv content.
 *
 * @param is
 *            the is
 * @return the list
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 */
private static List<String> readCsvContent(final InputStream is) throws IOException {
	final BufferedReader reader = new BufferedReader(new InputStreamReader(is,StandardCharsets.UTF_8));
	for (int i = 0; i < IGNORE_TOP_HEADERS_LINE; i++) {
		final String ignoreFirstLinesWithHeaders = reader.readLine();
	}

	final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader().withDelimiter(','));
	final List<CSVRecord> records = parser.getRecords();
	records.remove(0);

	final List<String> list = new ArrayList<>();

	for (final CSVRecord csvRecord : records) {
		list.add(csvRecord.get("Indicator Code"));
	}

	return list;
}

Example #19

Source File: LegacyRateReader.java From maestro-java with Apache License 2.0

6 votes

@Override
protected RateData readReader(Reader reader) throws IOException {
    Iterable<CSVRecord> records = CSVFormat.RFC4180
            .withCommentMarker('#')
            .withFirstRecordAsHeader()
            .withRecordSeparator(',')
            .withQuote('"')
            .withQuoteMode(QuoteMode.NON_NUMERIC)
            .parse(reader);

    for (CSVRecord record : records) {
        try {
            processor.process(record.get(0), record.get(1));
        } catch (Exception e) {
            logger.warn("Unable to parse record: {}", e.getMessage(), e);
        }
    }

    return processor.getRateData();
}

Example #20

Source File: LicenseStoreData.java From LicenseScout with Apache License 2.0

6 votes

/**
 * Reads license name mappings from a CSV file.
 * 
 * @param inputStream an input stream to read the file contents from
 * @param log the logger
 * @throws IOException
 */
public void readNameMappings(final InputStream inputStream, final ILSLog log) throws IOException {
    final CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withCommentMarker('#');
    try (final BufferedReader br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
        final CSVParser csvParser = csvFormat.parse(br);
        for (final CSVRecord record : csvParser) {
            final String mappedName = record.get(0).trim();
            final int numLicenseIdentifiers = record.size() - 1;
            final List<License> licenses = new ArrayList<>();
            for (int i = 0; i < numLicenseIdentifiers; i++) {
                final String licenseIdentifier = record.get(i + 1).trim();
                final License license = getLicenseBySpdxIdentifier(licenseIdentifier);
                if (license != null) {
                    licenses.add(license);
                } else {
                    log.info("readNameMappings: license identifier not found: " + licenseIdentifier);
                }
            }
            nameMappings.put(mappedName, licenses);
        }
    }
}

Example #21

Source File: Hiscores.java From rs-api with ISC License

6 votes

/**
 * Gets a {@link Player} based on their display name.
 * @param displayName The player's display name.
 * @param table The table of {@link Hiscores}.
 * @return An {@link Optional} containing the {@link Player}, or {@link Optional#empty()} if no {@link Player} was found with that name.
 * @throws IOException If an I/O error occurs.
 * @see <a href="https://runescape.wiki/w/Application_programming_interface#Hiscores_Lite">Hiscores Lite</a>
 * @see <a href="https://runescape.wiki/w/Application_programming_interface#Ironman_Lite">Ironman Hiscores Lite</a>
 * @see <a href="https://runescape.wiki/w/Application_programming_interface#Hardcore_Ironman_Lite">Hardcore Ironman Hiscores Lite</a>
 */
public Optional<Player> playerInformation(String displayName, HiscoreTable table) throws IOException {
	Preconditions.checkNotNull(displayName);
	Preconditions.checkNotNull(table);

	String escapedName = NAME_SPACER.matcher(displayName).replaceAll("+");
	String url = String.format(PLAYER_INFORMATION_URL_FORMAT, table.getName(), escapedName);
	ImmutableList<CSVRecord> records = client.fromCSV(url);

	ImmutableList<String> skillNames = table.getSkillNames();
	ImmutableList<String> activityNames = table.getActivityNames();

	if (records.size() >= (skillNames.size() + activityNames.size())) {
		ImmutableMap<String, Skill> skills = readSkills(records, skillNames);
		ImmutableMap<String, HiscoreActivity> activities = readActivities(records, skillNames, activityNames);
		return Optional.of(new Player(skills, activities));
	} else {
		return Optional.empty();
	}
}

Example #22

Source File: DataConversionHelper.java From sagemaker-sparkml-serving-container with Apache License 2.0

6 votes

/**
 * Parses the input payload in CSV format to a list of Objects
 * @param csvInput, the input received from the request in CSV format
 * @param schema, the data schema retrieved from environment variable
 * @return List of Objects, where each Object correspond to one feature of the input data
 * @throws IOException, if there is an exception thrown in the try-with-resources block
 */
public List<Object> convertCsvToObjectList(final String csvInput, final DataSchema schema) throws IOException {
    try (final StringReader sr = new StringReader(csvInput)) {
        final List<Object> valueList = Lists.newArrayList();
        final CSVParser parser = CSVFormat.DEFAULT.parse(sr);
        // We don not supporting multiple CSV lines as input currently
        final CSVRecord record = parser.getRecords().get(0);
        final int inputLength = schema.getInput().size();
        for (int idx = 0; idx < inputLength; ++idx) {
            ColumnSchema sc = schema.getInput().get(idx);
            // For CSV input, each value is treated as an individual feature by default
            valueList.add(this.convertInputDataToJavaType(sc.getType(), DataStructureType.BASIC, record.get(idx)));
        }
        return valueList;
    }
}

Example #23

Source File: Loc.java From core with GNU General Public License v3.0

6 votes

public static List<Loc> readLocs(String fileName) {
	List<Loc> locs = new ArrayList<Loc>();
	
	try {
		Reader in = new BufferedReader(new InputStreamReader(
				new FileInputStream(fileName), "UTF-8"));
		CSVFormat formatter = 
				CSVFormat.DEFAULT.withHeader().withCommentMarker('-');
		
		// Parse the file
		Iterable<CSVRecord> records = formatter.parse(in);
		Iterator<CSVRecord> iterator = records.iterator();
		while (iterator.hasNext()) {
			// Determine the record to process
			CSVRecord record = iterator.next();
			Loc loc = getLoc(record);
			if (loc.accuracy < MAX_ALLOWED_ACCURACY)
			locs.add(loc);
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
	
	return locs;
}

Example #24

Source File: CsvUpsertExecutor.java From phoenix with Apache License 2.0

5 votes

public CsvUpsertExecutor(Connection conn, String tableName,
        List<ColumnInfo> columnInfoList, UpsertListener<CSVRecord> upsertListener,
        String arrayElementSeparator) {
    super(conn, tableName, columnInfoList, upsertListener);
    this.arrayElementSeparator = arrayElementSeparator;
    finishInit();
}

Example #25

Source File: TestUtils.java From samoa with Apache License 2.0

5 votes

public static void assertResults(File outputFile, com.yahoo.labs.samoa.TestParams testParams) throws IOException {

        LOG.info("Checking results file " + outputFile.getAbsolutePath());
        // 1. parse result file with csv parser
        Reader in = new FileReader(outputFile);
        Iterable<CSVRecord> records = CSVFormat.EXCEL.withSkipHeaderRecord(false)
                .withIgnoreEmptyLines(true).withDelimiter(',').withCommentMarker('#').parse(in);
        CSVRecord last = null;
        Iterator<CSVRecord> iterator = records.iterator();
        CSVRecord header = iterator.next();
        Assert.assertEquals("Invalid number of columns", 5, header.size());

        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.EVALUATION_INSTANCES, header.get(0).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.CLASSIFIED_INSTANCES, header.get(1).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.CLASSIFICATIONS_CORRECT, header.get(2).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.KAPPA_STAT, header.get(3).trim());
        Assert.assertEquals("Unexpected column", com.yahoo.labs.samoa.TestParams.KAPPA_TEMP_STAT, header.get(4).trim());

        // 2. check last line result
        while (iterator.hasNext()) {
            last = iterator.next();
        }

        assertTrue(String.format("Unmet threshold expected %d got %f",
                testParams.getEvaluationInstances(), Float.parseFloat(last.get(0))),
                testParams.getEvaluationInstances() <= Float.parseFloat(last.get(0)));
        assertTrue(String.format("Unmet threshold expected %d got %f", testParams.getClassifiedInstances(),
                Float.parseFloat(last.get(1))),
                testParams.getClassifiedInstances() <= Float.parseFloat(last.get(1)));
        assertTrue(String.format("Unmet threshold expected %f got %f",
                testParams.getClassificationsCorrect(), Float.parseFloat(last.get(2))),
                testParams.getClassificationsCorrect() <= Float.parseFloat(last.get(2)));
        assertTrue(String.format("Unmet threshold expected %f got %f",
                testParams.getKappaStat(), Float.parseFloat(last.get(3))),
                testParams.getKappaStat() <= Float.parseFloat(last.get(3)));
        assertTrue(String.format("Unmet threshold expected %f got %f",
                testParams.getKappaTempStat(), Float.parseFloat(last.get(4))),
                testParams.getKappaTempStat() <= Float.parseFloat(last.get(4)));

    }

Example #26

Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0

5 votes

private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) {
  /** convert from CSV row to DLP Table Row */
  Iterator<String> valueIterator = csvRow.iterator();
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  while (valueIterator.hasNext()) {
    String value = valueIterator.next();
    if (value != null) {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build());
    } else {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build());
    }
  }

  return tableRowBuilder.build();
}

Example #27

Source File: CsvReader.java From jstarcraft-core with Apache License 2.0

5 votes

public CsvReader(InputStream inputStream, CodecDefinition definition) {
    super(definition);
    InputStreamReader buffer = new InputStreamReader(inputStream, StringUtility.CHARSET);
    try (CSVParser input = new CSVParser(buffer, FORMAT)) {
        Iterator<CSVRecord> iterator = input.iterator();
        if (iterator.hasNext()) {
            CSVRecord values = iterator.next();
            this.inputStream = values.iterator();
        }
    } catch (Exception exception) {
        throw new RuntimeException(exception);
    }
}

Example #28

Source File: HttpClient.java From rs-api with ISC License

5 votes

/**
 * Deserializes a CSV file from a specified URL into an {@link ImmutableList} of {@link CSVRecord}s.
 * @param url The URL to deserialize from.
 * @return An {@link ImmutableList} of {@link CSVRecord}s.
 * @throws IOException If an I/O error occurs.
 */
@Override
public ImmutableList<CSVRecord> fromCSV(String url) throws IOException {
	Preconditions.checkNotNull(url);

	try (CSVParser parser = CSVParser.parse(stringFrom(url), CSV_FORMAT)) {
		return ImmutableList.copyOf(parser.getRecords());
	}
}

Example #29

Source File: SimpleCsvFileLookupService.java From nifi with Apache License 2.0

5 votes

private void loadCache() throws IllegalStateException, IOException {
    if (lock.tryLock()) {
        try {
            final ComponentLog logger = getLogger();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading lookup table from file: " + csvFile);
            }

            final Map<String, String> properties = new HashMap<>();
            try (final InputStream is = new FileInputStream(csvFile)) {
                try (final InputStreamReader reader = new InputStreamReader(is, charset)) {
                    final Iterable<CSVRecord> records = csvFormat.withFirstRecordAsHeader().parse(reader);
                    for (final CSVRecord record : records) {
                        final String key = record.get(lookupKeyColumn);
                        final String value = record.get(lookupValueColumn);
                        if (StringUtils.isBlank(key)) {
                            throw new IllegalStateException("Empty lookup key encountered in: " + csvFile);
                        } else if (!ignoreDuplicates && properties.containsKey(key)) {
                            throw new IllegalStateException("Duplicate lookup key encountered: " + key + " in " + csvFile);
                        } else if (ignoreDuplicates && properties.containsKey(key)) {
                            logger.warn("Duplicate lookup key encountered: {} in {}", new Object[]{key, csvFile});
                        }
                        properties.put(key, value);
                    }
                }
            }

            this.cache = new ConcurrentHashMap<>(properties);

            if (cache.isEmpty()) {
                logger.warn("Lookup table is empty after reading file: " + csvFile);
            }
        } finally {
            lock.unlock();
        }
    }
}

Example #30

Source File: WideEcrfFileReader.java From hmftools with GNU General Public License v3.0

5 votes

@NotNull
private static List<CSVRecord> readCsvSkipHeader(@NotNull String pathToCsv, char delimiter) throws IOException {
    CSVFormat format = CSVFormat.DEFAULT.withDelimiter(delimiter);
    CSVParser parser = format.parse(new BufferedReader(new InputStreamReader(new FileInputStream(pathToCsv))));

    List<CSVRecord> records = parser.getRecords();
    return records.subList(1, records.size());
}