Java Code Examples for org.apache.commons.csv.CSVParser#parse()

The following examples show how to use org.apache.commons.csv.CSVParser#parse() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TreatmentCurator.java    From hmftools with GNU General Public License v3.0 8 votes vote down vote up
@NotNull
private static List<DrugEntry> readEntries(@NotNull InputStream mappingInputStream) throws IOException {
    List<DrugEntry> drugEntries = Lists.newArrayList();
    CSVParser parser = CSVParser.parse(mappingInputStream, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    for (CSVRecord record : parser) {
        String canonicalName = record.get(DRUG_NAME_CSV_FIELD).trim();
        String drugType = record.get(DRUG_TYPE_CSV_FIELD).trim();
        String synonymsField = record.get(DRUG_SYNONYMS_CSV_FIELD).trim();
        String treatmentMechanism = record.get(DRUG_MECHANISM_CSV_FILE.trim());

        List<String> synonyms = Lists.newArrayList();
        if (!synonymsField.isEmpty()) {
            CSVParser synonymsParser = CSVParser.parse(synonymsField, CSVFormat.DEFAULT);
            for (CSVRecord synonymsRecord : synonymsParser) {
                for (String synonym : synonymsRecord) {
                    synonyms.add(synonym.trim());
                }
            }
        }
        drugEntries.add(ImmutableDrugEntry.of(canonicalName, synonyms, drugType, treatmentMechanism));
    }
    return drugEntries;
}
 
Example 2
Source File: SymSpellSearchBenchMark.java    From customized-symspell with MIT License 7 votes vote down vote up
private List<String> readQueries(String queryFile) {
  List<String> queries = new ArrayList<>();
  try {
    URL queryResourceUrl = this.getClass().getClassLoader().getResource(queryFile);
    CSVParser qparser = CSVParser
        .parse(queryResourceUrl, Charset.forName("UTF-8"),
            CSVFormat.DEFAULT.withDelimiter(' '));
    java.util.Iterator<CSVRecord> csvIterator = qparser.iterator();
    while (csvIterator.hasNext()) {
      CSVRecord csvRecord = csvIterator.next();
      queries.add(csvRecord.get(0));
    }
  } catch (IOException ex) {
    System.err.println("Error occured " + ex);
  }
  return queries;
}
 
Example 3
Source File: SchemaResource.java    From macrobase with Apache License 2.0 6 votes vote down vote up
@PUT
@Consumes(MediaType.APPLICATION_JSON)
public SchemaResponse getSchema(SchemaRequest request) {
    SchemaResponse response = new SchemaResponse();

    if (request.baseQuery == null) {
        response.errorMessage = "Please enter a base query";
        return response;
    }

    try {
        // temp hack to enable CSV loading
        if(request.baseQuery.contains("csv://")) {
            File csvFile = new File(request.baseQuery.replace("csv://", ""));
            CSVParser p = CSVParser.parse(csvFile, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());

            Schema s = new Schema(new ArrayList<>());
            for(String header : p.getHeaderMap().keySet()) {
                s.getColumns().add(new Schema.SchemaColumn(header, "entry"));
            }
            response.schema = s;
        } else {
            conf.set(MacroBaseConf.DB_URL, request.pgUrl);
            conf.set(MacroBaseConf.BASE_QUERY, request.baseQuery);
            response.schema = ((SQLIngester) getLoader()).getSchema(request.baseQuery);
        }
    } catch (Exception e) {
        log.error("An error occurred while processing a request:", e);
        response.errorMessage = ExceptionUtils.getStackTrace(e);
    }

    return response;
}
 
Example 4
Source File: CSVFileResultHandler.java    From phoenix with Apache License 2.0 6 votes vote down vote up
public synchronized List<Result> read() throws IOException {
    util.ensureBaseResultDirExists();
    File file = new File(resultFileName);
    try (CSVParser parser = CSVParser
            .parse(file, Charset.defaultCharset(), CSVFormat.DEFAULT)) {
        List<CSVRecord> records = parser.getRecords();
        List<Result> results = new ArrayList<>();
        String header = null;
        for (CSVRecord record : records) {

            // First record is the CSV Header
            if (record.getRecordNumber() == 1) {
                header = record.toString();
                continue;
            }
            List<ResultValue> resultValues = new ArrayList<>();
            for (String val : record.toString().split(PherfConstants.RESULT_FILE_DELIMETER)) {
                resultValues.add(new ResultValue(val));
            }
            Result result = new Result(resultFileDetails, header, resultValues);
            results.add(result);
        }
        return results;
    }
}
 
Example 5
Source File: PrimaryTumorToDOIDMapper.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
static PrimaryTumorToDOIDMapper createFromResource() throws IOException {
    final CSVParser parser = CSVParser.parse(TUMOR_LOCATION_MAPPING_CSV, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    Map<String, Set<String>> doidsPerPrimaryTumor = Maps.newHashMap();
    for (final CSVRecord record : parser) {
        final String primaryTumorLocation = record.get("primaryTumorLocation");
        final String doids = record.get("doids");

        doidsPerPrimaryTumor.put(primaryTumorLocation, toSet(doids));
    }

    return new PrimaryTumorToDOIDMapper(doidsPerPrimaryTumor);
}
 
Example 6
Source File: HttpClient.java    From rs-api with ISC License 5 votes vote down vote up
/**
 * Deserializes a CSV file from a specified URL into an {@link ImmutableList} of {@link CSVRecord}s.
 * @param url The URL to deserialize from.
 * @return An {@link ImmutableList} of {@link CSVRecord}s.
 * @throws IOException If an I/O error occurs.
 */
@Override
public ImmutableList<CSVRecord> fromCSV(String url) throws IOException {
	Preconditions.checkNotNull(url);

	try (CSVParser parser = CSVParser.parse(stringFrom(url), CSV_FORMAT)) {
		return ImmutableList.copyOf(parser.getRecords());
	}
}
 
Example 7
Source File: CSVReader.java    From fastjgame with Apache License 2.0 5 votes vote down vote up
@Override
protected Iterator<CSVRecord> toIterator(File file, int sheetIndex) throws IOException {
    if (sheetIndex != 0) {
        throw new IllegalArgumentException("csv reader only support sheetIndex 0");
    }
    parser = CSVParser.parse(file, charset, CSVFormat.DEFAULT);
    return parser.iterator();
}
 
Example 8
Source File: PatientTumorLocation.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public static List<PatientTumorLocation> readRecords(@NotNull String filePath) throws IOException {
    CSVParser parser = CSVParser.parse(new File(filePath),
            Charset.defaultCharset(),
            CSVFormat.DEFAULT.withHeader(Header.class).withSkipHeaderRecord());
    return StreamSupport.stream(parser.spliterator(), false)
            .map(record -> ImmutablePatientTumorLocation.of(record.get(Header.patientIdentifier),
                    record.get(Header.primaryTumorLocation),
                    record.get(Header.cancerSubtype)))
            .collect(Collectors.toList());
}
 
Example 9
Source File: CSVUtils.java    From TomboloDigitalConnector with MIT License 5 votes vote down vote up
public static void extractAndSaveTimedValues(List<TimedValueExtractor> extractors, Importer importer, File localFile)
        throws IOException, ExtractorException {

    String line;
    BufferedReader br = new BufferedReader(new FileReader(localFile));
    List<TimedValue> timedValueBuffer = new ArrayList<>();
    while ((line = br.readLine())!=null) {
        CSVParser parser = CSVParser.parse(line, CSVFormat.DEFAULT);
        List<CSVRecord> records = parser.getRecords();
        for(TimedValueExtractor extractor: extractors){
            if (extractor.getSubjectLabelExtractor() instanceof CSVExtractor)
                ((CSVExtractor) extractor.getSubjectLabelExtractor()).setCsvRecord(records.get(0));
            if (extractor.getAttributeLabelExtractor() instanceof CSVExtractor)
                ((CSVExtractor) extractor.getAttributeLabelExtractor()).setCsvRecord(records.get(0));
            if (extractor.getTimestampExtractor() instanceof CSVExtractor)
                ((CSVExtractor) extractor.getTimestampExtractor()).setCsvRecord(records.get(0));
            if (extractor.getValueExtractor() instanceof CSVExtractor)
                ((CSVExtractor) extractor.getValueExtractor()).setCsvRecord(records.get(0));
            try {
                timedValueBuffer.add(extractor.extract());
            }catch (UnknownSubjectLabelException e){
                // No reason to panic even if Subject does not exist and no reason to run the rest of the extractors
                // Keep Calm and Break
                break;
            } catch (ExtractorException ee) {
                ee.getMessage();
            }
        }
    }
    br.close();
    importer.saveAndClearTimedValueBuffer(timedValueBuffer);
}
 
Example 10
Source File: CsvReader.java    From zstack with Apache License 2.0 5 votes vote down vote up
private List<String[]> getRecords(String content, CSVFormat format) throws IOException {
    List<String[]> records = new ArrayList<>();
    CSVParser parser = CSVParser.parse(content, format);
    for (CSVRecord record : parser.getRecords()) {
        String[] line = new String[record.size()];
        for (int i = 0; i < line.length; i++) {
            line[i] = record.get(i);
        }
        records.add(line);
    }
    return records;
}
 
Example 11
Source File: PrimaryTumorToDOIDMapper.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
static PrimaryTumorToDOIDMapper createFromResource() throws IOException {
    final CSVParser parser = CSVParser.parse(TUMOR_LOCATION_MAPPING_CSV, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    Map<String, Set<String>> doidsPerPrimaryTumor = Maps.newHashMap();
    for (final CSVRecord record : parser) {
        final String primaryTumorLocation = record.get("primaryTumorLocation");
        final String doids = record.get("doids");

        doidsPerPrimaryTumor.put(primaryTumorLocation, toSet(doids));
    }

    return new PrimaryTumorToDOIDMapper(doidsPerPrimaryTumor);
}
 
Example 12
Source File: SymSpellIndexBenchMark.java    From customized-symspell with MIT License 5 votes vote down vote up
private void indexData(String dataResourceName, DataHolder dataHolder)
    throws IOException, SpellCheckException {
  URL resourceUrl = this.getClass().getClassLoader().getResource(dataResourceName);
  CSVParser parser = CSVParser
      .parse(resourceUrl, Charset.forName("UTF-8"), CSVFormat.DEFAULT.withDelimiter(' '));
  java.util.Iterator<CSVRecord> csvIterator = parser.iterator();
  while (csvIterator.hasNext()) {
    CSVRecord csvRecord = csvIterator.next();
    dataHolder
        .addItem(new DictionaryItem(csvRecord.get(0), Double.valueOf(csvRecord.get(1)), 0d));
  }
}
 
Example 13
Source File: SymSpellSearchBenchMark.java    From customized-symspell with MIT License 5 votes vote down vote up
private void indexData(String dataResourceName, DataHolder dataHolder)
    throws IOException, SpellCheckException {
  URL resourceUrl = this.getClass().getClassLoader().getResource(dataResourceName);
  CSVParser parser = CSVParser
      .parse(resourceUrl, Charset.forName("UTF-8"), CSVFormat.DEFAULT.withDelimiter(' '));
  java.util.Iterator<CSVRecord> csvIterator = parser.iterator();
  while (csvIterator.hasNext()) {
    CSVRecord csvRecord = csvIterator.next();
    dataHolder
        .addItem(new DictionaryItem(csvRecord.get(0), Double.valueOf(csvRecord.get(1)), 0d));
  }
}
 
Example 14
Source File: GeographyCode.java    From MyBox with Apache License 2.0 4 votes vote down vote up
public static void importInternalCSV(Connection conn, LoadingController loading, File file, boolean predefined) {
    long importCount = 0, insertCount = 0, updateCount = 0, failedCount = 0;
    try ( CSVParser parser = CSVParser.parse(file, StandardCharsets.UTF_8,
            CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(',').withTrim().withNullString(""))) {
        conn.setAutoCommit(false);
        List<String> names = parser.getHeaderNames();
        if (loading != null) {
            loading.setInfo(message("Importing") + " " + file.getAbsolutePath());
        }
        try ( PreparedStatement gcidQeury = conn.prepareStatement(TableGeographyCode.GCidQeury);
                 PreparedStatement insert = conn.prepareStatement(TableGeographyCode.Insert);
                 PreparedStatement update = conn.prepareStatement(TableGeographyCode.Update)) {
            gcidQeury.setMaxRows(1);
            boolean exist;
            for (CSVRecord record : parser) {
                GeographyCode code = GeographyCode.readIntenalRecord(names, record);
                code.setPredefined(predefined);
                gcidQeury.setLong(1, code.getGcid());
                try ( ResultSet results = gcidQeury.executeQuery()) {
                    exist = results.next();
                }
                if (exist) {
                    if (TableGeographyCode.update(conn, update, code)) {
                        updateCount++;
                        importCount++;
                        if (loading != null && (importCount % 20 == 0)) {
                            loading.setInfo(message("Update") + ": " + updateCount + " "
                                    + code.getLevelCode().getName() + " " + code.getName()
                                    + " " + code.getLongitude() + " " + code.getLatitude());
                        }
                    } else {
                        ++failedCount;
                        if (loading != null) {
                            loading.setInfo(message("Failed") + ": " + failedCount + " "
                                    + code.getLevelCode().getName() + " " + code.getName()
                                    + " " + code.getLongitude() + " " + code.getLatitude());
                        }
                    }
                } else {
                    if (TableGeographyCode.insert(conn, insert, code)) {
                        insertCount++;
                        importCount++;
                        if (loading != null && (importCount % 20 == 0)) {
                            loading.setInfo(message("Insert") + ": " + insertCount + " "
                                    + code.getLevelCode().getName() + " " + code.getName()
                                    + " " + code.getLongitude() + " " + code.getLatitude());
                        }
                    } else {
                        ++failedCount;
                        if (loading != null) {
                            loading.setInfo(message("Failed") + ": " + failedCount + " "
                                    + code.getLevelCode().getName() + " " + code.getName()
                                    + " " + code.getLongitude() + " " + code.getLatitude());
                        }
                    }
                }
            }
            conn.commit();
        }
    } catch (Exception e) {
        logger.debug(e.toString());
    }
}
 
Example 15
Source File: CSVIngester.java    From macrobase with Apache License 2.0 4 votes vote down vote up
@Override
public MBStream<Datum> getStream() throws Exception {
    if(!loaded) {
        long st = System.currentTimeMillis();

        filename = conf.getString(MacroBaseConf.CSV_INPUT_FILE);
        Compression compression = conf.getCsvCompression();

        if (compression == Compression.GZIP) {
            InputStream fileStream = new FileInputStream(filename);
            InputStream gzipStream = new GZIPInputStream(fileStream);
            Reader decoder = new InputStreamReader(gzipStream);
            csvParser = new CSVParser(decoder, CSVFormat.DEFAULT.withHeader());
        } else {
            File csvFile = new File(conf.getString(MacroBaseConf.CSV_INPUT_FILE));
            csvParser = CSVParser.parse(csvFile, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
        }
        schema = csvParser.getHeaderMap(); //equal to resultSet.getmetadata or smt

        for (Map.Entry<String, Integer> se : schema.entrySet()) {
            conf.getEncoder().recordAttributeName(se.getValue() + 1, se.getKey()); //numbering off each column for encoding
        }

        // Load all records into memory to filter out rows with missing data
        Iterator<CSVRecord> rawIterator = csvParser.iterator();

        int numRows = 0;
        while (rawIterator.hasNext()) {
            try {
                CSVRecord record = rawIterator.next();
                Datum curRow = parseRecord(record);
                dataStream.add(curRow);
                numRows++;
            } catch (NumberFormatException e) {
                badRows++;
            }
        }
        log.info("{}/{} rows successfully parsed ({} malformed rows)", numRows, numRows + badRows, badRows);
    }

    return dataStream;
}
 
Example 16
Source File: ArgsTableBuilder.java    From dockerflow with Apache License 2.0 4 votes vote down vote up
/**
 * Load the workflow arguments from a CSV file. The header of the CSV contains the input or output
 * parameter names. Each row contains the workflow args for a single run. To run 100 instances of
 * a workflow concurrently, create a CSV with a header row plus 100 rows for each set of
 * parameters.
 *
 * <p>Columns by default are input parameters, passed as environment variables to the Docker
 * script. For file parameters, you can prefix the column header with "<" for input or ">" for
 * output. For clarity, you can also prefix the regular input parameters as "<", if you like.
 *
 * <p>The column header can also be "logging", which is a reserved name for the logging path.
 *
 * @param csvFile CSV file (RFC4180) that's local or in GCS
 * @return a map with the key being the clientId
 * @throws IOException
 */
static Map<String, WorkflowArgs> loadCsv(String csvFile) throws IOException {
  Map<String, WorkflowArgs> retval = new LinkedHashMap<String, WorkflowArgs>();

  String csv = FileUtils.readAll(csvFile);
  CSVParser parser = CSVParser.parse(csv, CSVFormat.RFC4180);

  // Parse header
  List<String> header = null;

  int row = 0;

  // Parse by row
  for (CSVRecord csvRecord : parser) {
    ArgsBuilder args = ArgsBuilder.of(String.valueOf(row));

    LOG.debug(StringUtils.toJson(csvRecord));

    // Parse header the first time
    if (row == 0) {
      header = new ArrayList<String>();
      for (String col : csvRecord) {
        header.add(col);
      }
    } else {
      // Set parameter defined in each column
      for (int col = 0; col < header.size(); ++col) {
        String name = header.get(col);
        String val = csvRecord.get(col);

        if (name.startsWith(PREFIX_INPUT)) {
          name = name.replace(PREFIX_INPUT, "");
          args.input(name, val);
        } else if (name.startsWith(INPUTS + "=")) {
          name = name.replace(INPUTS + "=", "");
          args.input(name, val);
        } else if (name.startsWith(INPUTS_FROM_FILE + "=")) {
          name = name.replace(INPUTS_FROM_FILE + "=", "");
          args.inputFromFile(name, val);
        } else if (name.startsWith(PREFIX_OUTPUT)) {
          name = name.replace(PREFIX_OUTPUT, "");
          args.output(name, val);
        } else if (name.startsWith(OUTPUTS + "=")) {
          name = name.replace(OUTPUTS + "=", "");
          args.input(name, val);
        } else if (LOGGING.equals(name)) {
          args.logging(val);
        } else if (WORKSPACE.equals(name)) {
          args.workspace(val);
        } else if (MEMORY.equals(name)) {
          args.memory(val);
        } else if (DISK_SIZE.equals(name)) {
          args.diskSize(val);
        } else if (CPU.equals(name)) {
          args.cpu(Integer.parseInt(val));
        } else {
          args.input(name, val);
        }
      }
      WorkflowArgs a = args.build();
      a.setRunIndex(row);
      retval.put(a.getClientId(), a);
    }
    ++row;
  }
  return retval;
}
 
Example 17
Source File: CSVExporterTest.java    From TomboloDigitalConnector with MIT License 4 votes vote down vote up
private List<CSVRecord> getRecords(String csvString) throws IOException {
	CSVParser parser = CSVParser.parse(csvString, CSVFormat.DEFAULT.withHeader());
	return parser.getRecords();
}
 
Example 18
Source File: TestDataProvider.java    From preDict with GNU Lesser General Public License v3.0 3 votes vote down vote up
/**
 * expects the name of a csv resource that matches the following format:
 * 
 * <pre>
 * 0 = correct word
 * 1 = true if this is a desired match,
 *     false if this is a false-positive match
 * 2 = comma separated list of similar word
 * </pre>
 * 
 * @param resourceName
 * @throws IOException
 */
public TestDataProvider(String resourceName) throws IOException {
	URL resourceUrl = this.getClass().getClassLoader().getResource(resourceName);
	CSVParser parser = CSVParser.parse(resourceUrl, Charset.forName("UTF-8"), CSVFormat.DEFAULT.withDelimiter(':'));
	Iterator<CSVRecord> csvIterator = parser.iterator();
	while (csvIterator.hasNext()) {
		CSVRecord csvRecord = csvIterator.next();
		baseWords.add(csvRecord.get(0));
		queries.addAll(Arrays.asList(csvRecord.get(2).split(",")));
	}
}
 
Example 19
Source File: CSVW.java    From rmlmapper-java with MIT License 2 votes vote down vote up
/**
 * This method returns a CSVParser.
 * @return a CSVParser.
 * @throws IOException
 */
CSVParser getCSVParser() throws IOException {
    return CSVParser.parse(inputStream, csvCharset, csvFormat);
}
 
Example 20
Source File: CSVCommonsLoader.java    From phoenix with Apache License 2.0 2 votes vote down vote up
/**
 * Upserts data from CSV file.
 *
 * Data is batched up based on connection batch size.
 * Column PDataType is read from metadata and is used to convert
 * column value to correct type before upsert.
 *
 * The constructor determines the format for the CSV files.
 *
 * @param fileName
 * @throws Exception
 */
public void upsert(String fileName) throws Exception {
    CSVParser parser = CSVParser.parse(new File(fileName), Charsets.UTF_8, format);
    upsert(parser);
}