Java Code Examples for org.apache.commons.csv.CSVFormat

The following examples show how to use org.apache.commons.csv.CSVFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: webtau   Author: testingisdocumenting   File: CsvUtils.java    License: Apache License 2.0 8 votes vote down vote up
private static CSVParser readCsvRecords(List<String> header, String content) {
    try {
        CSVFormat csvFormat = CSVFormat.RFC4180;
        if (header.isEmpty()) {
            csvFormat = csvFormat.withFirstRecordAsHeader();
        }

        return csvFormat.
                withIgnoreSurroundingSpaces().
                withIgnoreEmptyLines().
                withTrim().
                withDelimiter(',').
                parse(new StringReader(content));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
 
Example #2
Source Project: constellation   Author: constellation-app   File: CSVImportFileParser.java    License: Apache License 2.0 7 votes vote down vote up
@Override
public List<String[]> preview(final InputSource input, final PluginParameters parameters, final int limit) throws IOException {
    // Leave the header on, as the importer expects this as the first entry.
    final ArrayList<String[]> results = new ArrayList<>();
    try (final CSVParser csvFileParser = CSVFormat.RFC4180.parse(new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8.name()))) {
        int count = 0;
        final List<CSVRecord> records = csvFileParser.getRecords();
        for (final CSVRecord record : records) {
            final String[] line = new String[record.size()];
            for (int i = 0; i < record.size(); i++) {
                line[i] = record.get(i);
            }
            results.add(line);
            count += 1;
            if (count >= limit) {
                return results;
            }
        }
    }
    return results;
}
 
Example #3
Source Project: customized-symspell   Author: MighTguY   File: SymSpellSearchBenchMark.java    License: MIT License 7 votes vote down vote up
private List<String> readQueries(String queryFile) {
  List<String> queries = new ArrayList<>();
  try {
    URL queryResourceUrl = this.getClass().getClassLoader().getResource(queryFile);
    CSVParser qparser = CSVParser
        .parse(queryResourceUrl, Charset.forName("UTF-8"),
            CSVFormat.DEFAULT.withDelimiter(' '));
    java.util.Iterator<CSVRecord> csvIterator = qparser.iterator();
    while (csvIterator.hasNext()) {
      CSVRecord csvRecord = csvIterator.next();
      queries.add(csvRecord.get(0));
    }
  } catch (IOException ex) {
    System.err.println("Error occured " + ex);
  }
  return queries;
}
 
Example #4
Source Project: logging-log4j2   Author: apache   File: CsvLogEventLayout.java    License: Apache License 2.0 6 votes vote down vote up
@PluginFactory
public static CsvLogEventLayout createLayout(
        // @formatter:off
        @PluginConfiguration final Configuration config,
        @PluginAttribute(defaultString = DEFAULT_FORMAT) final String format,
        @PluginAttribute final Character delimiter,
        @PluginAttribute final Character escape,
        @PluginAttribute final Character quote,
        @PluginAttribute final QuoteMode quoteMode,
        @PluginAttribute final String nullString,
        @PluginAttribute final String recordSeparator,
        @PluginAttribute(defaultString = DEFAULT_CHARSET) final Charset charset,
        @PluginAttribute final String header,
        @PluginAttribute final String footer)
        // @formatter:on
{

    final CSVFormat csvFormat = createFormat(format, delimiter, escape, quote, quoteMode, nullString, recordSeparator);
    return new CsvLogEventLayout(config, charset, csvFormat, header, footer);
}
 
Example #5
Source Project: nifi   Author: apache   File: CSVUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomFormatWithELInvalidValues() {
    PropertyContext context = createContext("${csv.delimiter}", "${csv.quote}", "${csv.escape}", "${csv.comment}");

    Map<String, String> attributes = new HashMap<>();
    attributes.put("csv.delimiter", "invalid");
    attributes.put("csv.quote", "invalid");
    attributes.put("csv.escape", "invalid");
    attributes.put("csv.comment", "invalid");

    CSVFormat csvFormat = CSVUtils.createCSVFormat(context, attributes);

    assertEquals(',', csvFormat.getDelimiter());
    assertEquals('"', (char) csvFormat.getQuoteCharacter());
    assertEquals('\\', (char) csvFormat.getEscapeCharacter());
    assertNull(csvFormat.getCommentMarker());
}
 
Example #6
Source Project: pipeline-utility-steps-plugin   Author: jenkinsci   File: ReadCSVStep.java    License: MIT License 6 votes vote down vote up
@Override
public boolean permitsStaticMethod(@Nonnull Method method, @Nonnull Object[] args) {
    final Class<?> aClass = method.getDeclaringClass();
    final Package aPackage = aClass.getPackage();

    if (aPackage == null) {
        return false;
    }

    if (!aPackage.getName().equals(ORG_APACHE_COMMONS_CSV)) {
        return false;
    }

    if (aClass == CSVFormat.class) {
        return (method.getName().equals("newFormat") || method.getName().equals("valueOf"));
    }

    return false;
}
 
Example #7
Source Project: maestro-java   Author: maestro-performance   File: GeneralInfoReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Reader of csv file
 * @param reader reader
 * @return readed data
 * @throws IOException implementation specific
 */
@Override
protected GeneralInfoDataSet readReader(Reader reader) throws IOException {
    Iterable<CSVRecord> records = CSVFormat.RFC4180
            .withCommentMarker('#')
            .withFirstRecordAsHeader()
            .withRecordSeparator(';')
            .withQuote('"')
            .withQuoteMode(QuoteMode.NON_NUMERIC)
            .parse(reader);



    for (CSVRecord record : records) {
        try {
            generalInfoProcessor.process(record.get(0), record.get(1), record.get(2), record.get(3), record.get(4),
                    record.get(5), record.get(6), record.get(7), record.get(8), record.get(9));
        } catch (Throwable t) {
            logger.warn("Unable to parse record: {}", t.getMessage(), t);
        }
    }

    return generalInfoProcessor.getGeneralInfoDataSet();
}
 
Example #8
Source Project: ksql-fork-with-deep-learning-function   Author: kaiwaehner   File: KsqlDelimitedSerializer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public byte[] serialize(final String topic, final GenericRow genericRow) {
  if (genericRow == null) {
    return null;
  }
  try {
    StringWriter stringWriter = new StringWriter();
    CSVPrinter csvPrinter = new CSVPrinter(stringWriter, CSVFormat.DEFAULT);
    csvPrinter.printRecord(genericRow.getColumns());
    String result = stringWriter.toString();
    return result.substring(0, result.length() - 2).getBytes(StandardCharsets.UTF_8);
  } catch (Exception e) {
    throw new SerializationException("Error serializing CSV message", e);
  }

}
 
Example #9
Source Project: cloud-search-samples   Author: gsuitedevs   File: DictionarySample.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Parses the dictionary file (CSV) into a list of DictionaryEntry items
 *
 * @param dictionaryFilePath path to CSV file containing the dictionary
 */
List<DictionaryEntry> loadEntries(String dictionaryFilePath) throws IOException {
  try (BufferedReader br = new BufferedReader(new FileReader(dictionaryFilePath))) {
    CSVParser parser = new CSVParser(br, CSVFormat.DEFAULT);
    return StreamSupport.stream(parser.spliterator(), false)
        .filter(record -> !record.get(0).startsWith("#")) // Treat any row starting with # as comment
        .map(record -> { // Convert records
          String term = record.get(0);
          // Collect remaining columns as list of synonyms for the term
          List<String> synonyms = StreamSupport.stream(record.spliterator(), false)
              .skip(1) // Skip term
              .collect(Collectors.toList());
          return new DictionaryEntry(term, synonyms);
        })
        .collect(Collectors.toList());
  }
}
 
Example #10
Source Project: maestro-java   Author: maestro-performance   File: QueueReader.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected QueueDataSet readReader(Reader reader) throws IOException {
    Iterable<CSVRecord> records = CSVFormat.RFC4180
            .withCommentMarker('#')
            .withFirstRecordAsHeader()
            .withRecordSeparator(';')
            .withQuote('"')
            .withQuoteMode(QuoteMode.NON_NUMERIC)
            .parse(reader);



    for (CSVRecord record : records) {
        try {
            queueProcessor.process(record.get(0), record.get(1), record.get(2), record.get(3), record.get(4),
                    record.get(5), record.get(6));
        } catch (Throwable t) {
            logger.warn("Unable to parse record: {}", t.getMessage(), t);
        }
    }

    return queueProcessor.getQueueDataSet();
}
 
Example #11
Source Project: rival   Author: recommenders   File: UIPParser.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public DataModelIF<Long, Long> parseData(final File f) throws IOException {
    DataModelIF<Long, Long> dataset = new DataModel<>();
    Reader in = new InputStreamReader(new FileInputStream(f), "UTF-8");

    Iterable<CSVRecord> records;
    if (isHasHeader()) {
        records = CSVFormat.EXCEL.withDelimiter(getDelimiter()).withHeader().parse(in);
    } else {
        records = CSVFormat.EXCEL.withDelimiter(getDelimiter()).parse(in);
    }
    for (CSVRecord record : records) {
        long userID = Long.parseLong(record.get(getUserTok()));
        long itemID = Long.parseLong(record.get(getItemTok()));
        double preference = Double.parseDouble(record.get(getPrefTok()));
        dataset.addPreference(userID, itemID, preference);
    }
    in.close();
    return dataset;
}
 
Example #12
Source Project: oryx   Author: OryxProject   File: TextUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static String[] doParseDelimited(String delimited, CSVFormat format) {
  try (CSVParser parser = CSVParser.parse(delimited, format)) {
    Iterator<CSVRecord> records = parser.iterator();
    return records.hasNext() ?
        StreamSupport.stream(records.next().spliterator(), false).toArray(String[]::new) :
        EMPTY_STRING;
  } catch (IOException e) {
    throw new IllegalStateException(e); // Can't happen
  }
}
 
Example #13
Source Project: datacollector   Author: streamsets   File: TestDelimitedCharDataParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testClRfEndOfLines() throws Exception {
  OverrunReader reader = new OverrunReader(new StringReader("A,B\r\na,b"), 1000, true, false);
  DelimitedDataParserSettings settings = DelimitedDataParserSettings.builder()
      .withSkipStartLines(0)
      .withFormat(CSVFormat.DEFAULT)
      .withHeader(CsvHeader.NO_HEADER)
      .withMaxObjectLen(-1)
      .withRecordType(CsvRecordType.LIST)
      .withParseNull(false)
      .withNullConstant(null)
      .withAllowExtraColumns(false)
      .build();
  DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, settings);

  Assert.assertEquals("0", parser.getOffset());
  Record record = parser.parse();
  Assert.assertNotNull(record);
  Assert.assertEquals("id::0", record.getHeader().getSourceId());
  Assert.assertEquals("A", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
  Assert.assertFalse(record.has("[0]/header"));
  Assert.assertEquals("B", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
  Assert.assertFalse(record.has("[1]/header"));
  Assert.assertEquals("5", parser.getOffset());
  record = parser.parse();
  Assert.assertNotNull(record);
  Assert.assertEquals("id::5", record.getHeader().getSourceId());
  Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
  Assert.assertFalse(record.has("[0]/header"));
  Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
  Assert.assertFalse(record.has("[1]/header"));
  Assert.assertEquals("8", parser.getOffset());
  record = parser.parse();
  Assert.assertNull(record);
  Assert.assertEquals("-1", parser.getOffset());
  parser.close();
}
 
Example #14
Source Project: data-polygamy   Author: VIDA-NYU   File: FrameworkUtils.java    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static String[] splitStr(String val) throws IOException {
    
    CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
    CSVRecord record = parser.getRecords().get(0);
    Iterator<String> valuesIt = record.iterator();
    String[] input = new String[record.size()];
    int i = 0;
    while (valuesIt.hasNext()) {
        input[i] = valuesIt.next();
        i++;
    }
    parser.close();
    return input;
}
 
Example #15
Source Project: CodeDefenders   Author: CodeDefenders   File: AdminAnalyticsKillMapsApi.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Returns a CSV file containing the killmap analytics data.
 * The returned CSV will have a header.
 */
private void doGetCSV(HttpServletResponse response) throws IOException {
    response.setContentType("text/csv");

    List<KillmapDataDTO> killmapData = AnalyticsDAO.getAnalyticsKillMapData();

    String[] columns = new String[]{
        "userId",
        "userName",
        "classId",
        "className",
        "role",
        "usefulMutants",
        "usefulTests"
    };

    PrintWriter out = response.getWriter();
    CSVPrinter csvPrinter = new CSVPrinter(out, CSVFormat.DEFAULT.withHeader(columns));

    for (KillmapDataDTO k : killmapData) {
        for(String column : columns) {
            try {
                csvPrinter.print(PropertyUtils.getProperty(k, column));
            } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
                throw new RuntimeException(e);
            }
        }
        csvPrinter.println();
    }

    csvPrinter.flush();
}
 
Example #16
Source Project: constellation   Author: constellation-app   File: HashmodCSVImportFileParser.java    License: Apache License 2.0 5 votes vote down vote up
public List<String[]> parse(final HashmodInputSource input, final PluginParameters parameters) throws IOException {
    final ArrayList<String[]> results = new ArrayList<>();
    try (final CSVParser csvFileParser = CSVFormat.RFC4180.parse(new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8.name()))) {
        final List<CSVRecord> records = csvFileParser.getRecords();
        for (final CSVRecord record : records) {
            final String[] line = new String[record.size()];
            for (int i = 0; i < record.size(); i++) {
                line[i] = record.get(i);
            }
            results.add(line);
        }
    }
    return results;
}
 
Example #17
Source Project: nifi   Author: apache   File: SimpleCsvFileLookupService.java    License: Apache License 2.0 5 votes vote down vote up
@OnEnabled
public void onEnabled(final ConfigurationContext context) throws InitializationException, IOException, FileNotFoundException {
    this.csvFile = context.getProperty(CSV_FILE).evaluateAttributeExpressions().getValue();
    this.csvFormat = CSVFormat.Predefined.valueOf(context.getProperty(CSV_FORMAT).getValue()).getFormat();
    this.charset = context.getProperty(CHARSET).evaluateAttributeExpressions().getValue();
    this.lookupKeyColumn = context.getProperty(LOOKUP_KEY_COLUMN).evaluateAttributeExpressions().getValue();
    this.lookupValueColumn = context.getProperty(LOOKUP_VALUE_COLUMN).evaluateAttributeExpressions().getValue();
    this.ignoreDuplicates = context.getProperty(IGNORE_DUPLICATES).asBoolean();
    this.watcher = new SynchronousFileWatcher(Paths.get(csvFile), new LastModifiedMonitor(), 30000L);
    try {
        loadCache();
    } catch (final IllegalStateException e) {
        throw new InitializationException(e.getMessage(), e);
    }
}
 
Example #18
Source Project: WhiteRabbit   Author: OHDSI   File: ETLSummaryGenerator.java    License: Apache License 2.0 5 votes vote down vote up
static void writeCsv(String filename, List<Row> rows) {
    if (!filename.toLowerCase().endsWith(".csv")) {
        filename = filename + ".csv";
    }

    // TODO: try with resources
    WriteCSVFileWithHeader out = new WriteCSVFileWithHeader(filename, CSVFormat.RFC4180);
    for (Row row : rows) {
        out.write(row);
    }
    out.close();
}
 
Example #19
Source Project: jstarcraft-example   Author: HongZhaoHua   File: MovieDataConfigurer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 装配数据模型
 * 
 * @param movieDataSpace
 * @return
 */
@Bean("movieDataModule")
DataModule getMovieDataModule(DataSpace movieDataSpace, List<MovieUser> movieUsers, List<MovieItem> movieItems) throws Exception {
    TreeMap<Integer, String> configuration = new TreeMap<>();
    configuration.put(1, "user");
    configuration.put(2, "item");
    configuration.put(3, "score");
    configuration.put(4, "instant");
    DataModule dataModule = movieDataSpace.makeDenseModule("score", configuration, 1000000);

    File file = new File("data/ml-100k/u.data");
    CSVFormat format = CSVFormat.DEFAULT.withDelimiter('\t');
    DataConverter<InputStream> convertor = new CsvConverter(format, movieDataSpace.getQualityAttributes(), movieDataSpace.getQuantityAttributes());
    try (InputStream stream = new FileInputStream(file)) {
        convertor.convert(dataModule, stream);
    }

    int userDimension = dataModule.getQualityInner("user");
    int itemDimension = dataModule.getQualityInner("item");
    int scoreDimension = dataModule.getQuantityInner("score");
    for (DataInstance instance : dataModule) {
        int userIndex = instance.getQualityFeature(userDimension);
        int itemIndex = instance.getQualityFeature(itemDimension);
        instance.setQuantityMark(instance.getQuantityFeature(scoreDimension));
        movieUsers.get(userIndex).click(itemIndex);
    }

    return dataModule;
}
 
Example #20
Source Project: customized-symspell   Author: MighTguY   File: SymSpellSearchBenchMark.java    License: MIT License 5 votes vote down vote up
private void indexData(String dataResourceName, DataHolder dataHolder)
    throws IOException, SpellCheckException {
  URL resourceUrl = this.getClass().getClassLoader().getResource(dataResourceName);
  CSVParser parser = CSVParser
      .parse(resourceUrl, Charset.forName("UTF-8"), CSVFormat.DEFAULT.withDelimiter(' '));
  java.util.Iterator<CSVRecord> csvIterator = parser.iterator();
  while (csvIterator.hasNext()) {
    CSVRecord csvRecord = csvIterator.next();
    dataHolder
        .addItem(new DictionaryItem(csvRecord.get(0), Double.valueOf(csvRecord.get(1)), 0d));
  }
}
 
Example #21
Source Project: customized-symspell   Author: MighTguY   File: SymSpellIndexBenchMark.java    License: MIT License 5 votes vote down vote up
private void indexData(String dataResourceName, DataHolder dataHolder)
    throws IOException, SpellCheckException {
  URL resourceUrl = this.getClass().getClassLoader().getResource(dataResourceName);
  CSVParser parser = CSVParser
      .parse(resourceUrl, Charset.forName("UTF-8"), CSVFormat.DEFAULT.withDelimiter(' '));
  java.util.Iterator<CSVRecord> csvIterator = parser.iterator();
  while (csvIterator.hasNext()) {
    CSVRecord csvRecord = csvIterator.next();
    dataHolder
        .addItem(new DictionaryItem(csvRecord.get(0), Double.valueOf(csvRecord.get(1)), 0d));
  }
}
 
Example #22
Source Project: CQL   Author: CategoricalData   File: ToCsvPragmaInstance.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
public static CSVFormat getFormat(AqlOptions op) {
	String format0 = "Default";
	CSVFormat format = CSVFormat.valueOf(format0);

	format = format.withDelimiter((Character) op.getOrDefault(AqlOption.csv_field_delim_char));
	format = format.withQuote((Character) op.getOrDefault(AqlOption.csv_quote_char));
	format = format.withEscape((Character) op.getOrDefault(AqlOption.csv_escape_char));
	format = format.withQuoteMode(QuoteMode.ALL);
	format = format.withNullString(null);

	return format;
}
 
Example #23
Source Project: datacollector   Author: streamsets   File: TestDelimitedCharDataParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testParseIgnoreHeaderWithListMap() throws Exception {
  OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
  DelimitedDataParserSettings settings = DelimitedDataParserSettings.builder()
      .withSkipStartLines(0)
      .withFormat(CSVFormat.DEFAULT)
      .withHeader(CsvHeader.IGNORE_HEADER)
      .withMaxObjectLen(-1)
      .withRecordType(CsvRecordType.LIST_MAP)
      .withParseNull(false)
      .withNullConstant(null)
      .withAllowExtraColumns(false)
      .build();
  DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, settings);

  Assert.assertEquals("4", parser.getOffset());
  Record record = parser.parse();
  Assert.assertNotNull(record);
  Assert.assertEquals("id::4", record.getHeader().getSourceId());
  Assert.assertEquals("a", record.get().getValueAsListMap().get("0").getValueAsString());
  Assert.assertEquals("b", record.get().getValueAsListMap().get("1").getValueAsString());
  Assert.assertEquals("7", parser.getOffset());
  record = parser.parse();
  Assert.assertNull(record);
  Assert.assertEquals("-1", parser.getOffset());
  parser.close();
}
 
Example #24
Source Project: hedera-mirror-node   Author: hashgraph   File: PostgresCSVDomainWriter.java    License: Apache License 2.0 5 votes vote down vote up
private static CSVPrinter getTransactionsCSVPrinter(String outputDir) throws IOException {
    return new CSVPrinter(
            Files.newBufferedWriter(Paths.get(outputDir, "transaction")),
            CSVFormat.DEFAULT.withHeader(
                    "node_account_id", "memo", "payer_account_id", "charged_tx_fee", "initial_balance", "entity_id",
                    "valid_start_ns", "consensus_ns", "valid_duration_seconds", "max_fee", "transaction_hash",
                    "result", "type", "transaction_bytes"));
}
 
Example #25
Source Project: hmftools   Author: hartwigmedical   File: WideEcrfFileReader.java    License: GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static List<CSVRecord> readCsvSkipHeader(@NotNull String pathToCsv, char delimiter) throws IOException {
    CSVFormat format = CSVFormat.DEFAULT.withDelimiter(delimiter);
    CSVParser parser = format.parse(new BufferedReader(new InputStreamReader(new FileInputStream(pathToCsv))));

    List<CSVRecord> records = parser.getRecords();
    return records.subList(1, records.size());
}
 
Example #26
Source Project: WhiteRabbit   Author: OHDSI   File: ConceptsMap.java    License: Apache License 2.0 5 votes vote down vote up
private void load(String filename) throws IOException{
    try (InputStream conceptStream = Database.class.getResourceAsStream(filename)) {
        for (CSVRecord conceptRow : CSVFormat.RFC4180.withHeader().parse(new InputStreamReader(conceptStream))) {
            String omopTableName = conceptRow.get("omop_cdm_table");
            String omopFieldName = conceptRow.get("omop_cdm_field");

            Concept concept = new Concept();
            concept.setConceptId(conceptRow.get("concept_id"));
            concept.setConceptName(conceptRow.get("concept_name"));
            concept.setStandardConcept(conceptRow.get("standard_concept"));

            // Optional fields
            if (conceptRow.isSet("domain_id")) {
                concept.setDomainId(conceptRow.get("domain_id"));
            }

            if (conceptRow.isSet("vocabulary_id")) {
                concept.setVocabularyId(conceptRow.get("vocabulary_id"));
            }

            if (conceptRow.isSet("concept_class_id")) {
                concept.setConceptClassId(conceptRow.get("concept_class_id"));
            }

            this.put(omopTableName, omopFieldName, concept);
        }
    } catch (IOException e) {
        throw new IOException("Could not load concept_id hints: " + e.getMessage());
    }
}
 
Example #27
Source Project: datacollector   Author: streamsets   File: TestDelimitedCharDataParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testParseIgnoreHeader() throws Exception {
  OverrunReader reader = new OverrunReader(new StringReader("A,B\na,b"), 1000, true, false);
  DelimitedDataParserSettings settings = DelimitedDataParserSettings.builder()
      .withSkipStartLines(0)
      .withFormat(CSVFormat.DEFAULT)
      .withHeader(CsvHeader.IGNORE_HEADER)
      .withMaxObjectLen(-1)
      .withRecordType(CsvRecordType.LIST)
      .withParseNull(false)
      .withNullConstant(null)
      .withAllowExtraColumns(false)
      .build();
  DataParser parser = new DelimitedCharDataParser(getContext(), "id", reader, 0, settings);

  Assert.assertEquals("4", parser.getOffset());
  Record record = parser.parse();
  Assert.assertNotNull(record);
  Assert.assertEquals("id::4", record.getHeader().getSourceId());
  Assert.assertEquals("a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
  Assert.assertFalse(record.has("[0]/header"));
  Assert.assertEquals("b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
  Assert.assertFalse(record.has("[1]/header"));
  Assert.assertEquals("7", parser.getOffset());
  record = parser.parse();
  Assert.assertNull(record);
  Assert.assertEquals("-1", parser.getOffset());
  parser.close();
}
 
Example #28
Source Project: connector-sdk   Author: google-cloudsearch   File: CSVFileManager.java    License: Apache License 2.0 5 votes vote down vote up
private static CSVFormat createCsvFormat(String csvFormatName, List<String> csvColumns,
  boolean skipHeader) {
  CSVFormat csvFormat = null;
  if (csvFormatName.isEmpty()) {
    csvFormat = CSVFormat.DEFAULT.withIgnoreSurroundingSpaces();
  } else {
    Set<CSVFormat.Predefined> csvFormats = getPredefinedCsvFormats();
    for (CSVFormat.Predefined format : csvFormats) {
      if (format.toString().equalsIgnoreCase(csvFormatName)) {
        csvFormat = format.getFormat();
        break;
      }
    }
    if (csvFormat == null) {
      throw new InvalidConfigurationException(
          "Invalid CSVFormat " + csvFormatName + ", must be one of " + csvFormats);
    }
  }
  csvFormat = applyCsvFormatMethods(csvFormat);
  if (csvColumns.isEmpty()) {
    checkState(
        !skipHeader,
        "csv.csvColumns property must be specified "
            + "if csv.skipHeaderRecord is true");
    return csvFormat.withHeader();
  } else {
    return csvFormat
        .withHeader(csvColumns.toArray(new String[0]))
        .withSkipHeaderRecord(skipHeader);
  }
}
 
Example #29
Source Project: beam   Author: apache   File: BeamKafkaCSVTableTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCsvRecorderDecoder() {
  PCollection<Row> result =
      pipeline
          .apply(Create.of("1,\"1\",1.0", "2,2,2.0"))
          .apply(ParDo.of(new String2KvBytes()))
          .apply(new BeamKafkaCSVTable.CsvRecorderDecoder(genSchema(), CSVFormat.DEFAULT));

  PAssert.that(result).containsInAnyOrder(ROW1, ROW2);

  pipeline.run();
}
 
Example #30
Source Project: datacollector   Author: streamsets   File: TestCsvParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSkipLines() throws Exception {
  CsvParser parser = new CsvParser(
      new CountingReader(new StringReader("foo\nbar\r\na,b,c\naa,bb,cc\ne,f,g\n")),
      CSVFormat.DEFAULT.withHeader((String[])null).withSkipHeaderRecord(false),
      -1,
      0,
      2
  );
  try {
    Assert.assertEquals(9, parser.getReaderPosition());

    String[] record = parser.read();
    Assert.assertEquals(15, parser.getReaderPosition());
    Assert.assertNotNull(record);
    Assert.assertArrayEquals(new String[]{"a", "b", "c"}, record);
    record = parser.read();
    Assert.assertNotNull(record);
    Assert.assertArrayEquals(new String[]{"aa", "bb", "cc"}, record);
    Assert.assertEquals(24, parser.getReaderPosition());
    record = parser.read();
    Assert.assertNotNull(record);
    Assert.assertArrayEquals(new String[]{"e", "f", "g"}, record);
    Assert.assertEquals(30, parser.getReaderPosition());
    record = parser.read();
    Assert.assertNull(record);
    Assert.assertEquals(30, parser.getReaderPosition());
  } finally {
    parser.close();
  }
}