org.apache.commons.csv.CSVParser Java Examples

The following examples show how to use org.apache.commons.csv.CSVParser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CsvUtils.java    From webtau with Apache License 2.0 8 votes vote down vote up
private static CSVParser readCsvRecords(List<String> header, String content) {
    try {
        CSVFormat csvFormat = CSVFormat.RFC4180;
        if (header.isEmpty()) {
            csvFormat = csvFormat.withFirstRecordAsHeader();
        }

        return csvFormat.
                withIgnoreSurroundingSpaces().
                withIgnoreEmptyLines().
                withTrim().
                withDelimiter(',').
                parse(new StringReader(content));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
 
Example #2
Source File: TreatmentCurator.java    From hmftools with GNU General Public License v3.0 8 votes vote down vote up
@NotNull
private static List<DrugEntry> readEntries(@NotNull InputStream mappingInputStream) throws IOException {
    List<DrugEntry> drugEntries = Lists.newArrayList();
    CSVParser parser = CSVParser.parse(mappingInputStream, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    for (CSVRecord record : parser) {
        String canonicalName = record.get(DRUG_NAME_CSV_FIELD).trim();
        String drugType = record.get(DRUG_TYPE_CSV_FIELD).trim();
        String synonymsField = record.get(DRUG_SYNONYMS_CSV_FIELD).trim();
        String treatmentMechanism = record.get(DRUG_MECHANISM_CSV_FILE.trim());

        List<String> synonyms = Lists.newArrayList();
        if (!synonymsField.isEmpty()) {
            CSVParser synonymsParser = CSVParser.parse(synonymsField, CSVFormat.DEFAULT);
            for (CSVRecord synonymsRecord : synonymsParser) {
                for (String synonym : synonymsRecord) {
                    synonyms.add(synonym.trim());
                }
            }
        }
        drugEntries.add(ImmutableDrugEntry.of(canonicalName, synonyms, drugType, treatmentMechanism));
    }
    return drugEntries;
}
 
Example #3
Source File: FrameworkUtils.java    From data-polygamy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * String Parsing 
 */

public static String[] splitStr(String val, Integer len) throws IOException {
    
    String[] input;
    
    try {
        CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
        CSVRecord record = parser.getRecords().get(0);
        input = new String[len];
        Iterator<String> valuesIt = record.iterator();
        int i = 0;
        while (valuesIt.hasNext()) {
            input[i] = valuesIt.next().trim();
            i++;
        }
        parser.close();
    } catch (ArrayIndexOutOfBoundsException e) {
        input = val.split(",", len);
        for (int i = 0; i < input.length; i++)
            input[i] = input[i].trim();
    }
    
    return input;
}
 
Example #4
Source File: LicenseStoreData.java    From LicenseScout with Apache License 2.0 6 votes vote down vote up
/**
 * Reads license name mappings from a CSV file.
 * 
 * @param inputStream an input stream to read the file contents from
 * @param log the logger
 * @throws IOException
 */
public void readNameMappings(final InputStream inputStream, final ILSLog log) throws IOException {
    final CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withCommentMarker('#');
    try (final BufferedReader br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
        final CSVParser csvParser = csvFormat.parse(br);
        for (final CSVRecord record : csvParser) {
            final String mappedName = record.get(0).trim();
            final int numLicenseIdentifiers = record.size() - 1;
            final List<License> licenses = new ArrayList<>();
            for (int i = 0; i < numLicenseIdentifiers; i++) {
                final String licenseIdentifier = record.get(i + 1).trim();
                final License license = getLicenseBySpdxIdentifier(licenseIdentifier);
                if (license != null) {
                    licenses.add(license);
                } else {
                    log.info("readNameMappings: license identifier not found: " + licenseIdentifier);
                }
            }
            nameMappings.put(mappedName, licenses);
        }
    }
}
 
Example #5
Source File: PrimaryTumorToDOIDMapper.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@NotNull
static PrimaryTumorToDOIDMapper createFromResource() throws IOException {
    final CSVParser parser = CSVParser.parse(TUMOR_LOCATION_MAPPING_CSV, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    Map<String, Set<String>> doidsPerPrimaryTumor = Maps.newHashMap();
    for (final CSVRecord record : parser) {
        final String primaryTumorLocation = record.get("primaryTumorLocation");
        final String doids = record.get("doids");

        doidsPerPrimaryTumor.put(primaryTumorLocation, toSet(doids));
    }

    return new PrimaryTumorToDOIDMapper(doidsPerPrimaryTumor);
}
 
Example #6
Source File: TestSpreadsheetExtractor.java    From tabula-java with MIT License 6 votes vote down vote up
@Test
public void testFindSpreadsheetsFromCells() throws IOException {

    CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"),
            Charset.forName("utf-8"),
            CSVFormat.DEFAULT);

    List<Cell> cells = new ArrayList<>();

    for (CSVRecord record : parse) {
        cells.add(new Cell(Float.parseFloat(record.get(0)),
                Float.parseFloat(record.get(1)),
                Float.parseFloat(record.get(2)),
                Float.parseFloat(record.get(3))));
    }


    List<Rectangle> expected = Arrays.asList(EXPECTED_RECTANGLES);
    Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER);
    List<Rectangle> foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells);
    Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER);
    assertTrue(foundRectangles.equals(expected));
}
 
Example #7
Source File: CSVDataset.java    From djl-demo with Apache License 2.0 6 votes vote down vote up
CSVDataset build() throws IOException {
    Path path = Paths.get("dataset");
    Files.createDirectories(path);
    Path csvFile = path.resolve("malicious_url_data.csv");
    if (!Files.exists(csvFile)) {
        logger.info("Downloading dataset file ...");
        URL url =
                new URL(
                        "https://raw.githubusercontent.com/incertum/cyber-matrix-ai/master/Malicious-URL-Detection-Deep-Learning/data/url_data_mega_deep_learning.csv");
        Files.copy(url.openStream(), csvFile);
    }

    try (Reader reader = Files.newBufferedReader(csvFile);
            CSVParser csvParser =
                    new CSVParser(
                            reader,
                            CSVFormat.DEFAULT
                                    .withHeader("url", "isMalicious")
                                    .withFirstRecordAsHeader()
                                    .withIgnoreHeaderCase()
                                    .withTrim())) {
        dataset = csvParser.getRecords();
        return new CSVDataset(this);
    }
}
 
Example #8
Source File: LicenseStoreData.java    From LicenseScout with Apache License 2.0 6 votes vote down vote up
/**
 * Reads license URL mappings from a CSV file.
 * 
 * @param inputStream an input stream to read the file contents from
 * @param log the logger
 * @throws IOException
 */
public void readUrlMappings(final InputStream inputStream, final ILSLog log) throws IOException {
    final CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withCommentMarker('#');
    try (final BufferedReader br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
        final CSVParser csvParser = csvFormat.parse(br);
        for (final CSVRecord record : csvParser) {
            final String url = record.get(0).trim();
            final int numLicenseIdentifiers = record.size() - 1;
            final List<License> licenses = new ArrayList<>();
            for (int i = 0; i < numLicenseIdentifiers; i++) {
                final String licenseIdentifier = record.get(i + 1).trim();
                final License license = getLicenseBySpdxIdentifier(licenseIdentifier);
                if (license != null) {
                    licenses.add(license);
                } else {
                    log.info("readUrlMappings: license identifier not found: " + licenseIdentifier);
                }
            }
            urlMappings.put(url, licenses);
        }
    }
}
 
Example #9
Source File: SchemaResource.java    From macrobase with Apache License 2.0 6 votes vote down vote up
@PUT
@Consumes(MediaType.APPLICATION_JSON)
public SchemaResponse getSchema(SchemaRequest request) {
    SchemaResponse response = new SchemaResponse();

    if (request.baseQuery == null) {
        response.errorMessage = "Please enter a base query";
        return response;
    }

    try {
        // temp hack to enable CSV loading
        if(request.baseQuery.contains("csv://")) {
            File csvFile = new File(request.baseQuery.replace("csv://", ""));
            CSVParser p = CSVParser.parse(csvFile, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());

            Schema s = new Schema(new ArrayList<>());
            for(String header : p.getHeaderMap().keySet()) {
                s.getColumns().add(new Schema.SchemaColumn(header, "entry"));
            }
            response.schema = s;
        } else {
            conf.set(MacroBaseConf.DB_URL, request.pgUrl);
            conf.set(MacroBaseConf.BASE_QUERY, request.baseQuery);
            response.schema = ((SQLIngester) getLoader()).getSchema(request.baseQuery);
        }
    } catch (Exception e) {
        log.error("An error occurred while processing a request:", e);
        response.errorMessage = ExceptionUtils.getStackTrace(e);
    }

    return response;
}
 
Example #10
Source File: DataExportEngineTest.java    From TomboloDigitalConnector with MIT License 6 votes vote down vote up
@Test
public void testExportsCSV() throws Exception {
    DataExportSpecificationBuilder csvBuilder = DataExportSpecificationBuilder.withCSVExporter();
    csvBuilder
            .addSubjectSpecification(
                    new SubjectSpecificationBuilder(AbstractONSImporter.PROVIDER.getLabel(), "lsoa").setMatcher("label", "E01002766"))
            .addDatasourceSpecification("uk.org.tombolo.importer.ons.CensusImporter", "qs103ew", "")
            .addFieldSpecification(
                    FieldBuilder.fractionOfTotal("percentage_under_1_years_old_label")
                            .addDividendAttribute("uk.gov.ons", "Age: Age under 1") // number under one year old
                            .setDivisorAttribute("uk.gov.ons", "Age: All categories: Age") // total population
            );

    engine.execute(csvBuilder.build(), writer, emptyImporterMatcher);

    List<CSVRecord> records = CSVParser.parse(writer.toString(), CSVFormat.DEFAULT.withHeader()).getRecords();

    assertEquals(1, records.size());
    assertEquals("E01002766", records.get(0).get("label"));
    assertEquals("0.012263099219620958", records.get(0).get("percentage_under_1_years_old_label"));
}
 
Example #11
Source File: RecordStoreUtilities.java    From constellation with Apache License 2.0 6 votes vote down vote up
/**
 * Loads a serialized {@link RecordStore} from an {@link InputStream}. The
 * first row will be assumed to be the heading.
 *
 * @param in An {@link InputStream} pointing to a serialized
 * {@link RecordStore}.
 * @return The {@link RecordStore} object as loaded from the stream.
 * @throws IOException If there is an issue reading from the stream.
 */
public static RecordStore fromCsv(final InputStream in) throws IOException {
    final RecordStore recordStore = new GraphRecordStore();

    try (final CSVParser csvFileParser = CSVFormat.DEFAULT.parse(new InputStreamReader(in, StandardCharsets.UTF_8.name()))) {
        final List<CSVRecord> recs = csvFileParser.getRecords();
        for (int i = 1; i < recs.size(); i++) {
            recordStore.add();
            for (int j = 0; j < recs.get(i).size(); j++) {
                recordStore.set(recs.get(0).get(j), recs.get(i).get(j));
            }
        }
    }

    return recordStore;
}
 
Example #12
Source File: HashmodCSVImportFileParser.java    From constellation with Apache License 2.0 6 votes vote down vote up
public List<String[]> parse(final HashmodInputSource input, final PluginParameters parameters) throws IOException {
    final ArrayList<String[]> results = new ArrayList<>();
    try (final CSVParser csvFileParser = CSVFormat.RFC4180.parse(new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8.name()))) {
        final List<CSVRecord> records = csvFileParser.getRecords();
        for (final CSVRecord record : records) {
            final String[] line = new String[record.size()];
            for (int i = 0; i < record.size(); i++) {
                line[i] = record.get(i);
            }
            results.add(line);
        }
    }
    return results;
}
 
Example #13
Source File: HashmodCSVImportFileParser.java    From constellation with Apache License 2.0 6 votes vote down vote up
public List<String[]> preview(final HashmodInputSource input, final PluginParameters parameters, final int limit) throws IOException {
    // Leave the header on, as the importer expects this as the first entry.
    final ArrayList<String[]> results = new ArrayList<>();
    try (final CSVParser csvFileParser = CSVFormat.RFC4180.parse(new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8.name()))) {
        int count = 0;
        final List<CSVRecord> records = csvFileParser.getRecords();
        for (final CSVRecord record : records) {
            final String[] line = new String[record.size()];
            for (int i = 0; i < record.size(); i++) {
                line[i] = record.get(i);
            }
            results.add(line);
            count++;
            if (count >= limit) {
                return results;
            }
        }
    }
    return results;
}
 
Example #14
Source File: MedicineReadWriteExample.java    From tablestore-examples with Apache License 2.0 6 votes vote down vote up
public void importMeta() throws IOException {
    TimestreamMetaTable metaTable = db.metaTable();
    String [] fileHeader = {"分类", "名称", "监管号", "受理号", "生产日期", "有效日期", "注册分类", "申请类型", "企业名称", "任务类型"};
    String csvFile = conf.getMetaFile();
    CSVFormat format = CSVFormat.DEFAULT.withHeader(fileHeader).withIgnoreHeaderCase().withTrim();
    Reader reader = Files.newBufferedReader(Paths.get(csvFile));
    CSVParser csvParser = new CSVParser(reader, format);
    for (CSVRecord r : csvParser.getRecords()) {
        TimestreamIdentifier identifier = new TimestreamIdentifier.Builder(r.get("分类"))
                .addTag("名称", r.get("名称"))
                .addTag("监管号", r.get("监管号"))
                .build();
        TimestreamMeta meta = new TimestreamMeta(identifier);

        meta.addAttribute("produced_date", r.get("生产日期"));
        meta.addAttribute("period_of_validity", r.get("有效日期"));

        List<String> extension = new ArrayList();
        extension.add("受理号=" + r.get("受理号"));
        extension.add("注册分类=" + r.get("注册分类"));
        extension.add("申请类型=" + r.get("申请类型"));
        extension.add("企业名称=" + r.get("企业名称"));
        extension.add("任务类型=" + r.get("任务类型"));
        meta.addAttribute("extension", new Gson().toJson(extension));

        metaTable.put(meta);
        System.out.println(meta.toString());
    }
}
 
Example #15
Source File: CsvLoader.java    From timbuctoo with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void loadData(List<Tuple<String, File>> files, Importer importer) throws InvalidFileException, IOException {
  for (Tuple<String, File> file : files) {
    CSVParser parser = format.parse(new FileReader(file.getRight()));

    String filename = file.getLeft();
    //remove well-known extensions
    if (filename.endsWith(".csv") || filename.endsWith(".tsv") || filename.endsWith(".txt")) {
      filename = filename.substring(0, filename.length() - 4);
    }
    importer.startCollection(filename);

    parser.getHeaderMap().forEach((name, column) -> importer.registerPropertyName(column, name));

    parser.forEach(row -> {
      importer.startEntity();
      for (int i = 0; i < row.size(); i++) {
        importer.setValue(i, row.get(i));
      }
      importer.finishEntity();
    });

    importer.finishCollection();
  }
}
 
Example #16
Source File: DictionaryConnector.java    From cloud-search-samples with Apache License 2.0 6 votes vote down vote up
/**
 * Gets all the data repository documents.
 *
 * This is the core of the {@link Repository} implemented code for a full
 * traversal connector. A complete traversal of the entire data repository
 * is performed here.
 *
 * For this sample there are only a small set of statically created documents
 * defined.
 *
 * @param checkpoint save state from last iteration
 * @return An iterator of {@link RepositoryDoc} instances
 */
@Override
public CheckpointCloseableIterable<ApiOperation> getAllDocs(byte[] checkpoint)
    throws RepositoryException {
  log.info("Retrieving all documents.");

  CSVFormat csvFormat = CSVFormat.RFC4180.withIgnoreEmptyLines()
      .withIgnoreSurroundingSpaces()
      .withCommentMarker('#');
  try (BufferedReader br = new BufferedReader(new FileReader(dictionaryFilePath));
      CSVParser parser = new CSVParser(br, csvFormat)) {
    List<ApiOperation> allDocs = StreamSupport.stream(parser.spliterator(), false)
        .map(this::buildDocument)
        .collect(Collectors.toList());
    return new CheckpointCloseableIterableImpl.Builder<>(allDocs).build();
  } catch (IOException e) {
    throw new RepositoryException.Builder()
        .setCause(e)
        .setErrorType(RepositoryException.ErrorType.CLIENT_ERROR)
        .build();
  }
}
 
Example #17
Source File: DataConversionHelper.java    From sagemaker-sparkml-serving-container with Apache License 2.0 6 votes vote down vote up
/**
 * Parses the input payload in CSV format to a list of Objects
 * @param csvInput, the input received from the request in CSV format
 * @param schema, the data schema retrieved from environment variable
 * @return List of Objects, where each Object correspond to one feature of the input data
 * @throws IOException, if there is an exception thrown in the try-with-resources block
 */
public List<Object> convertCsvToObjectList(final String csvInput, final DataSchema schema) throws IOException {
    try (final StringReader sr = new StringReader(csvInput)) {
        final List<Object> valueList = Lists.newArrayList();
        final CSVParser parser = CSVFormat.DEFAULT.parse(sr);
        // We don not supporting multiple CSV lines as input currently
        final CSVRecord record = parser.getRecords().get(0);
        final int inputLength = schema.getInput().size();
        for (int idx = 0; idx < inputLength; ++idx) {
            ColumnSchema sc = schema.getInput().get(idx);
            // For CSV input, each value is treated as an individual feature by default
            valueList.add(this.convertInputDataToJavaType(sc.getType(), DataStructureType.BASIC, record.get(idx)));
        }
        return valueList;
    }
}
 
Example #18
Source File: DelimitedTextReader.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
protected void initParser(InputSplit inSplit) throws IOException,
    InterruptedException {
    fileIn = openFile(inSplit, true);
    if (fileIn == null) {
        return;
    }
    instream = new InputStreamReader(fileIn, encoding);

    bytesRead = 0;
    fileLen = inSplit.getLength();
    if (uriName == null) {
        generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false);
        if (generateId) {
            idGen = new IdGenerator(file.toUri().getPath() + "-"
                + ((FileSplit) inSplit).getStart());
        } else {
            uriId = 0;
        }
    }
    parser = new CSVParser(instream, CSVParserFormatter.
    		getFormat(delimiter, encapsulator, true,
    				true));
    parserIterator = parser.iterator();
}
 
Example #19
Source File: GeographyCode.java    From MyBox with Apache License 2.0 6 votes vote down vote up
public static List<GeographyCode> readInternalCSV(File file) {
    List<GeographyCode> codes = new ArrayList();
    try ( CSVParser parser = CSVParser.parse(file, StandardCharsets.UTF_8,
            CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(',').withTrim().withNullString(""))) {
        List<String> names = parser.getHeaderNames();
        for (CSVRecord record : parser) {
            GeographyCode code = GeographyCode.readIntenalRecord(names, record);
            if (code != null) {
                codes.add(code);
            }
        }
    } catch (Exception e) {
        logger.debug(e.toString());
    }
    return codes;
}
 
Example #20
Source File: CsvConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context, MultiOutputReceiver outputReceiver) {
  ReadableFile f = context.element();
  String headers;
  List<String> records = null;
  String delimiter = String.valueOf(this.csvFormat.getDelimiter());
  try {
    String csvFileString = f.readFullyAsUTF8String();
    StringReader reader = new StringReader(csvFileString);
    CSVParser parser = CSVParser.parse(reader, this.csvFormat.withFirstRecordAsHeader());
    records =
        parser.getRecords().stream()
            .map(i -> String.join(delimiter, i))
            .collect(Collectors.toList());
    headers = String.join(delimiter, parser.getHeaderNames());
  } catch (IOException ioe) {
    LOG.error("Headers do not match, consistency cannot be guaranteed");
    throw new RuntimeException("Could not read Csv headers: " + ioe.getMessage());
  }
  outputReceiver.get(this.headerTag).output(headers);
  records.forEach(r -> outputReceiver.get(this.linesTag).output(r));
}
 
Example #21
Source File: CSVHeaderSchemaStrategy.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public RecordSchema getSchema(Map<String, String> variables, final InputStream contentStream, final RecordSchema readSchema) throws SchemaNotFoundException {
    if (this.context == null) {
        throw new SchemaNotFoundException("Schema Access Strategy intended only for validation purposes and cannot obtain schema");
    }

    try {
        final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables).withFirstRecordAsHeader();
        try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream));
            final CSVParser csvParser = new CSVParser(reader, csvFormat)) {

            final List<RecordField> fields = new ArrayList<>();
            for (final String columnName : csvParser.getHeaderMap().keySet()) {
                fields.add(new RecordField(columnName, RecordFieldType.STRING.getDataType(), true));
            }

            return new SimpleRecordSchema(fields);
        }
    } catch (final Exception e) {
        throw new SchemaNotFoundException("Failed to read Header line from CSV", e);
    }
}
 
Example #22
Source File: CSVRecordFactory.java    From rmlmapper-java with MIT License 6 votes vote down vote up
/**
 * This method returns a CSVParser from a simple access (local/remote CSV file; no CSVW).
 * @param access the used access.
 * @return a CSVParser.
 * @throws IOException
 */
private CSVParser getParserForNormalCSV(Access access) throws IOException, SQLException, ClassNotFoundException {
    CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord(false);
    InputStream inputStream = access.getInputStream();

    if (inputStream != null) {
        try {
            return CSVParser.parse(inputStream, StandardCharsets.UTF_8, csvFormat);
        } catch (IllegalArgumentException e) {
            // TODO give warning to user
            return null;
        }
    } else {
        return null;
    }
}
 
Example #23
Source File: ImportFileHelper.java    From PolyGlot with MIT License 6 votes vote down vote up
/**
 * Collects all rows from given CSV file and returns string input values
 *
 * @param inputFile path of file to read
 * @return List of rows
 * @throws FileNotFoundException if CSV does not exist
 * @throws IOException if read error
 */
private List<List<String>> getRows(String inputFile, CSVFormat format) throws FileNotFoundException, IOException, MalformedInputException {
    List<List<String>> ret = new ArrayList<>();
    
    try (
        Reader reader = Files.newBufferedReader(Paths.get(inputFile));
        CSVParser csvParser = new CSVParser(reader, format)
    ) {
        for (CSVRecord csvRecord : csvParser) {
            List<String> row = new ArrayList<>();
            
            for (int i = 0; i < csvRecord.size(); i++) {
                row.add(csvRecord.get(i));
            }
            
            ret.add(deNullRow(row));
        }
    }

    return ret;
}
 
Example #24
Source File: CsvFileReader.java    From neodymium-library with MIT License 6 votes vote down vote up
public static List<Map<String, String>> readFile(InputStream inputStream)
{
    List<Map<String, String>> data = new LinkedList<>();
    CSVParser csvParser;
    try
    {
        csvParser = CSVParser.parse(inputStream, CHARSET_UTF8, CSV_FORMAT);
        for (CSVRecord record : csvParser.getRecords())
        {
            data.add(record.toMap());
        }

    }
    catch (IOException e)
    {
        throw new RuntimeException(e);
    }

    return data;
}
 
Example #25
Source File: PlatformDataExportServiceIT.java    From find with MIT License 6 votes vote down vote up
@Test
public void exportToCsv() throws E, IOException {
    final R queryRequest = queryRequestBuilderFactory.getObject()
            .queryRestrictions(testUtils.buildQueryRestrictions())
            .queryType(QueryRequest.QueryType.MODIFIED)
            .build();

    final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    exportService.exportQueryResults(outputStream, queryRequest, ExportFormat.CSV, Collections.emptyList(), 1001L);
    final String output = outputStream.toString();
    assertNotNull(output);

    try (final CSVParser csvParser = CSVParser.parse(output, CSVFormat.EXCEL)) {
        final List<CSVRecord> records = csvParser.getRecords();
        assertThat(records, not(empty()));
        final CSVRecord headerRecord = records.get(0);
        assertThat(headerRecord.get(0), endsWith("Reference")); // byte-order mark may get in the way
        assertEquals("Database", headerRecord.get(1));
        final CSVRecord firstDataRecord = records.get(1);
        final String firstDataRecordReference = firstDataRecord.get(0);
        assertNotNull(firstDataRecordReference);
        assertFalse(firstDataRecordReference.trim().isEmpty());
        final String firstDataRecordDatabase = firstDataRecord.get(1);
        assertFalse(firstDataRecordDatabase.trim().isEmpty());
    }
}
 
Example #26
Source File: CredentialReportCSVParserImpl.java    From fullstop with Apache License 2.0 6 votes vote down vote up
@Override
public List<CSVReportEntry> apply(final GetCredentialReportResult report) {
    Assert.state(Textcsv.toString().equals(report.getReportFormat()), "unknown credential report format: " + report.getReportFormat());

    try (final Reader r = new BufferedReader(new InputStreamReader(new ByteBufferBackedInputStream(report.getContent())))) {
        final CSVParser parser = new CSVParser(r, CSV_FORMAT);
        final Map<String, Integer> headers = parser.getHeaderMap();

        Assert.state(headers.containsKey("user"), "Header 'user' not found in CSV");
        Assert.state(headers.containsKey("arn"), "Header 'arn' not found in CSV");
        Assert.state(headers.containsKey("password_enabled"), "Header 'password_enabled' not found in CSV");
        Assert.state(headers.containsKey("mfa_active"), "Header 'mfa_active' not found in CSV");
        Assert.state(headers.containsKey("access_key_1_active"), "Header 'access_key_1_active' not found in CSV");
        Assert.state(headers.containsKey("access_key_2_active"), "Header 'access_key_2_active' not found in CSV");

        return stream(parser.spliterator(), false).map(this::toCSVReportEntry).filter(Objects::nonNull).collect(toList());
    } catch (final IOException e) {
        throw new RuntimeException("Could not read csv report", e);
    }
}
 
Example #27
Source File: CsvReaderDataSource.java    From obevo with Apache License 2.0 6 votes vote down vote up
/**
 * Putting this init here so that we can discover the file fields before running the actual rec
 */
public void init() {
    if (!this.initialized) {
        try {
            MutableList<String> fields;
            if (csvVersion == CsvStaticDataReader.CSV_V2) {
                CSVFormat csvFormat = CsvStaticDataReader.getCsvFormat(delim, nullToken);
                this.csvreaderV2 = new CSVParser(reader, csvFormat);
                this.iteratorV2 = csvreaderV2.iterator();
                fields = ListAdapter.adapt(IteratorUtils.toList(iteratorV2.next().iterator()));
            } else {
                this.csvreaderV1 = new au.com.bytecode.opencsv.CSVReader(this.reader, this.delim);
                fields = ArrayAdapter.adapt(this.csvreaderV1.readNext());
            }

            this.fields = fields.collect(this.convertDbObjectName);
        } catch (Exception e) {
            throw new DeployerRuntimeException(e);
        }
        this.initialized = true;
    }
}
 
Example #28
Source File: UserCSVUploadPost.java    From alfresco-remote-api with GNU Lesser General Public License v3.0 6 votes vote down vote up
protected void processCSVUpload(InputStream input, List<Map<QName,String>> users)
    throws IOException
{
    InputStreamReader reader = new InputStreamReader(input, Charset.forName("UTF-8"));
    CSVFormat format = CSVFormat.EXCEL;
    CSVParser csv = format.parse(reader);

    String[][] data = csv.getRecords().stream()
        .map(record -> {
            List<String> recordValues = new ArrayList<>();
            record.iterator().forEachRemaining(recordValues::add);
            return recordValues.toArray(String[]::new);
        }).toArray(String[][]::new);

    if (data.length > 0)
    {
        processSpreadsheetUpload(data, users);
    }
}
 
Example #29
Source File: DataProcessorUtil.java    From Insights with Apache License 2.0 6 votes vote down vote up
private boolean parseCsvRecords(boolean status, CSVParser csvParser, Neo4jDBHandler dbHandler,
		Map<String, Integer> headerMap, String query)
		throws IOException, GraphDBException, InsightsCustomException {
	List<JsonObject> nodeProperties = new ArrayList<>();
	List<String> combo = new ArrayList<>();
	getCurrentRecords(combo, dbHandler);
	int record = 0;
	for (CSVRecord csvRecord : csvParser.getRecords()) {
		JsonObject json = getHierachyDetails(csvRecord, headerMap);
		record = record + 1;
		json.addProperty(DatataggingConstants.METADATA_ID, Instant.now().getNano() + record);
		json.addProperty(DatataggingConstants.CREATIONDATE, Instant.now().toEpochMilli());
		nodeProperties.add(json);
		updateComboList(combo, json);
	}
	JsonObject graphResponse = dbHandler.bulkCreateNodes(nodeProperties, null, query);
	if (graphResponse.get(DatataggingConstants.RESPONSE).getAsJsonObject().get(DatataggingConstants.ERRORS)
			.getAsJsonArray().size() > 0) {
		log.error(graphResponse);
		return status;
	}

	return true;
}
 
Example #30
Source File: BulkUploadService.java    From Insights with Apache License 2.0 6 votes vote down vote up
/**
 * Send records to getToolFileDetails() and store the output in neo4j database
 *
 * @param csvParser
 * @param label
 * @param insightsTimeField
 * @param insightsTimeFormat
 * @return boolean
 * @throws InsightsCustomException
 */
private boolean parseCsvRecords(CSVParser csvParser, String label, String insightsTimeField,
		String insightsTimeFormat) throws InsightsCustomException {
	List<JsonObject> nodeProperties = new ArrayList<>();
	String query = "UNWIND {props} AS properties " + "CREATE (n:" + label.toUpperCase() + ") "
			+ "SET n = properties";
	Map<String, Integer> headerMap = csvParser.getHeaderMap();
	try {
		if (headerMap.containsKey("")) {
			throw new InsightsCustomException("Error in file.");
		} else if (headerMap.containsKey(insightsTimeField)) {
			for (CSVRecord csvRecord : csvParser.getRecords()) {
				JsonObject json = getCSVRecordDetails(csvRecord, headerMap, insightsTimeField, insightsTimeFormat);
				nodeProperties.add(json);
			}
		} else {
			throw new InsightsCustomException("Insights Time Field not present in csv file");
		}
		insertDataInDatabase(nodeProperties, query);
		return true;
	} catch (Exception ex) {
		log.error("Error while parsing the .CSV records. {} ", ex.getMessage());
		throw new InsightsCustomException(ex.getMessage());
	}
}