Java Code Examples for org.apache.commons.csv.CSVParser

The following examples show how to use org.apache.commons.csv.CSVParser. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hmftools   Source File: TreatmentCurator.java    License: GNU General Public License v3.0 8 votes vote down vote up
@NotNull
private static List<DrugEntry> readEntries(@NotNull InputStream mappingInputStream) throws IOException {
    List<DrugEntry> drugEntries = Lists.newArrayList();
    CSVParser parser = CSVParser.parse(mappingInputStream, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    for (CSVRecord record : parser) {
        String canonicalName = record.get(DRUG_NAME_CSV_FIELD).trim();
        String drugType = record.get(DRUG_TYPE_CSV_FIELD).trim();
        String synonymsField = record.get(DRUG_SYNONYMS_CSV_FIELD).trim();
        String treatmentMechanism = record.get(DRUG_MECHANISM_CSV_FILE.trim());

        List<String> synonyms = Lists.newArrayList();
        if (!synonymsField.isEmpty()) {
            CSVParser synonymsParser = CSVParser.parse(synonymsField, CSVFormat.DEFAULT);
            for (CSVRecord synonymsRecord : synonymsParser) {
                for (String synonym : synonymsRecord) {
                    synonyms.add(synonym.trim());
                }
            }
        }
        drugEntries.add(ImmutableDrugEntry.of(canonicalName, synonyms, drugType, treatmentMechanism));
    }
    return drugEntries;
}
 
Example 2
Source Project: djl-demo   Source File: CSVDataset.java    License: Apache License 2.0 6 votes vote down vote up
CSVDataset build() throws IOException {
    Path path = Paths.get("dataset");
    Files.createDirectories(path);
    Path csvFile = path.resolve("malicious_url_data.csv");
    if (!Files.exists(csvFile)) {
        logger.info("Downloading dataset file ...");
        URL url =
                new URL(
                        "https://raw.githubusercontent.com/incertum/cyber-matrix-ai/master/Malicious-URL-Detection-Deep-Learning/data/url_data_mega_deep_learning.csv");
        Files.copy(url.openStream(), csvFile);
    }

    try (Reader reader = Files.newBufferedReader(csvFile);
            CSVParser csvParser =
                    new CSVParser(
                            reader,
                            CSVFormat.DEFAULT
                                    .withHeader("url", "isMalicious")
                                    .withFirstRecordAsHeader()
                                    .withIgnoreHeaderCase()
                                    .withTrim())) {
        dataset = csvParser.getRecords();
        return new CSVDataset(this);
    }
}
 
Example 3
Source Project: constellation   Source File: RecordStoreUtilities.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Loads a serialized {@link RecordStore} from an {@link InputStream}. The
 * first row will be assumed to be the heading.
 *
 * @param in An {@link InputStream} pointing to a serialized
 * {@link RecordStore}.
 * @return The {@link RecordStore} object as loaded from the stream.
 * @throws IOException If there is an issue reading from the stream.
 */
public static RecordStore fromCsv(final InputStream in) throws IOException {
    final RecordStore recordStore = new GraphRecordStore();

    try (final CSVParser csvFileParser = CSVFormat.DEFAULT.parse(new InputStreamReader(in, StandardCharsets.UTF_8.name()))) {
        final List<CSVRecord> recs = csvFileParser.getRecords();
        for (int i = 1; i < recs.size(); i++) {
            recordStore.add();
            for (int j = 0; j < recs.get(i).size(); j++) {
                recordStore.set(recs.get(0).get(j), recs.get(i).get(j));
            }
        }
    }

    return recordStore;
}
 
Example 4
Source Project: constellation   Source File: HashmodCSVImportFileParser.java    License: Apache License 2.0 6 votes vote down vote up
public List<String[]> preview(final HashmodInputSource input, final PluginParameters parameters, final int limit) throws IOException {
    // Leave the header on, as the importer expects this as the first entry.
    final ArrayList<String[]> results = new ArrayList<>();
    try (final CSVParser csvFileParser = CSVFormat.RFC4180.parse(new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8.name()))) {
        int count = 0;
        final List<CSVRecord> records = csvFileParser.getRecords();
        for (final CSVRecord record : records) {
            final String[] line = new String[record.size()];
            for (int i = 0; i < record.size(); i++) {
                line[i] = record.get(i);
            }
            results.add(line);
            count++;
            if (count >= limit) {
                return results;
            }
        }
    }
    return results;
}
 
Example 5
Source Project: LicenseScout   Source File: LicenseStoreData.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Reads license URL mappings from a CSV file.
 * 
 * @param inputStream an input stream to read the file contents from
 * @param log the logger
 * @throws IOException
 */
public void readUrlMappings(final InputStream inputStream, final ILSLog log) throws IOException {
    final CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withCommentMarker('#');
    try (final BufferedReader br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
        final CSVParser csvParser = csvFormat.parse(br);
        for (final CSVRecord record : csvParser) {
            final String url = record.get(0).trim();
            final int numLicenseIdentifiers = record.size() - 1;
            final List<License> licenses = new ArrayList<>();
            for (int i = 0; i < numLicenseIdentifiers; i++) {
                final String licenseIdentifier = record.get(i + 1).trim();
                final License license = getLicenseBySpdxIdentifier(licenseIdentifier);
                if (license != null) {
                    licenses.add(license);
                } else {
                    log.info("readUrlMappings: license identifier not found: " + licenseIdentifier);
                }
            }
            urlMappings.put(url, licenses);
        }
    }
}
 
Example 6
Source Project: LicenseScout   Source File: LicenseStoreData.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Reads license name mappings from a CSV file.
 * 
 * @param inputStream an input stream to read the file contents from
 * @param log the logger
 * @throws IOException
 */
public void readNameMappings(final InputStream inputStream, final ILSLog log) throws IOException {
    final CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(',').withCommentMarker('#');
    try (final BufferedReader br = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))) {
        final CSVParser csvParser = csvFormat.parse(br);
        for (final CSVRecord record : csvParser) {
            final String mappedName = record.get(0).trim();
            final int numLicenseIdentifiers = record.size() - 1;
            final List<License> licenses = new ArrayList<>();
            for (int i = 0; i < numLicenseIdentifiers; i++) {
                final String licenseIdentifier = record.get(i + 1).trim();
                final License license = getLicenseBySpdxIdentifier(licenseIdentifier);
                if (license != null) {
                    licenses.add(license);
                } else {
                    log.info("readNameMappings: license identifier not found: " + licenseIdentifier);
                }
            }
            nameMappings.put(mappedName, licenses);
        }
    }
}
 
Example 7
Source Project: tabula-java   Source File: TestSpreadsheetExtractor.java    License: MIT License 6 votes vote down vote up
@Test
public void testFindSpreadsheetsFromCells() throws IOException {

    CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"),
            Charset.forName("utf-8"),
            CSVFormat.DEFAULT);

    List<Cell> cells = new ArrayList<>();

    for (CSVRecord record : parse) {
        cells.add(new Cell(Float.parseFloat(record.get(0)),
                Float.parseFloat(record.get(1)),
                Float.parseFloat(record.get(2)),
                Float.parseFloat(record.get(3))));
    }


    List<Rectangle> expected = Arrays.asList(EXPECTED_RECTANGLES);
    Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER);
    List<Rectangle> foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells);
    Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER);
    assertTrue(foundRectangles.equals(expected));
}
 
Example 8
Source Project: timbuctoo   Source File: CsvLoader.java    License: GNU General Public License v3.0 6 votes vote down vote up
@Override
public void loadData(List<Tuple<String, File>> files, Importer importer) throws InvalidFileException, IOException {
  for (Tuple<String, File> file : files) {
    CSVParser parser = format.parse(new FileReader(file.getRight()));

    String filename = file.getLeft();
    //remove well-known extensions
    if (filename.endsWith(".csv") || filename.endsWith(".tsv") || filename.endsWith(".txt")) {
      filename = filename.substring(0, filename.length() - 4);
    }
    importer.startCollection(filename);

    parser.getHeaderMap().forEach((name, column) -> importer.registerPropertyName(column, name));

    parser.forEach(row -> {
      importer.startEntity();
      for (int i = 0; i < row.size(); i++) {
        importer.setValue(i, row.get(i));
      }
      importer.finishEntity();
    });

    importer.finishCollection();
  }
}
 
Example 9
Source Project: cloud-search-samples   Source File: DictionaryConnector.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Gets all the data repository documents.
 *
 * This is the core of the {@link Repository} implemented code for a full
 * traversal connector. A complete traversal of the entire data repository
 * is performed here.
 *
 * For this sample there are only a small set of statically created documents
 * defined.
 *
 * @param checkpoint save state from last iteration
 * @return An iterator of {@link RepositoryDoc} instances
 */
@Override
public CheckpointCloseableIterable<ApiOperation> getAllDocs(byte[] checkpoint)
    throws RepositoryException {
  log.info("Retrieving all documents.");

  CSVFormat csvFormat = CSVFormat.RFC4180.withIgnoreEmptyLines()
      .withIgnoreSurroundingSpaces()
      .withCommentMarker('#');
  try (BufferedReader br = new BufferedReader(new FileReader(dictionaryFilePath));
      CSVParser parser = new CSVParser(br, csvFormat)) {
    List<ApiOperation> allDocs = StreamSupport.stream(parser.spliterator(), false)
        .map(this::buildDocument)
        .collect(Collectors.toList());
    return new CheckpointCloseableIterableImpl.Builder<>(allDocs).build();
  } catch (IOException e) {
    throw new RepositoryException.Builder()
        .setCause(e)
        .setErrorType(RepositoryException.ErrorType.CLIENT_ERROR)
        .build();
  }
}
 
Example 10
/**
 * Parses the input payload in CSV format to a list of Objects
 * @param csvInput, the input received from the request in CSV format
 * @param schema, the data schema retrieved from environment variable
 * @return List of Objects, where each Object correspond to one feature of the input data
 * @throws IOException, if there is an exception thrown in the try-with-resources block
 */
public List<Object> convertCsvToObjectList(final String csvInput, final DataSchema schema) throws IOException {
    try (final StringReader sr = new StringReader(csvInput)) {
        final List<Object> valueList = Lists.newArrayList();
        final CSVParser parser = CSVFormat.DEFAULT.parse(sr);
        // We don not supporting multiple CSV lines as input currently
        final CSVRecord record = parser.getRecords().get(0);
        final int inputLength = schema.getInput().size();
        for (int idx = 0; idx < inputLength; ++idx) {
            ColumnSchema sc = schema.getInput().get(idx);
            // For CSV input, each value is treated as an individual feature by default
            valueList.add(this.convertInputDataToJavaType(sc.getType(), DataStructureType.BASIC, record.get(idx)));
        }
        return valueList;
    }
}
 
Example 11
/**
 * String Parsing 
 */

public static String[] splitStr(String val, Integer len) throws IOException {
    
    String[] input;
    
    try {
        CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
        CSVRecord record = parser.getRecords().get(0);
        input = new String[len];
        Iterator<String> valuesIt = record.iterator();
        int i = 0;
        while (valuesIt.hasNext()) {
            input[i] = valuesIt.next().trim();
            i++;
        }
        parser.close();
    } catch (ArrayIndexOutOfBoundsException e) {
        input = val.split(",", len);
        for (int i = 0; i < input.length; i++)
            input[i] = input[i].trim();
    }
    
    return input;
}
 
Example 12
Source Project: MyBox   Source File: GeographyCode.java    License: Apache License 2.0 6 votes vote down vote up
public static List<GeographyCode> readInternalCSV(File file) {
    List<GeographyCode> codes = new ArrayList();
    try ( CSVParser parser = CSVParser.parse(file, StandardCharsets.UTF_8,
            CSVFormat.DEFAULT.withFirstRecordAsHeader().withDelimiter(',').withTrim().withNullString(""))) {
        List<String> names = parser.getHeaderNames();
        for (CSVRecord record : parser) {
            GeographyCode code = GeographyCode.readIntenalRecord(names, record);
            if (code != null) {
                codes.add(code);
            }
        }
    } catch (Exception e) {
        logger.debug(e.toString());
    }
    return codes;
}
 
Example 13
Source Project: webtau   Source File: CsvUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static CSVParser readCsvRecords(List<String> header, String content) {
    try {
        CSVFormat csvFormat = CSVFormat.RFC4180;
        if (header.isEmpty()) {
            csvFormat = csvFormat.withFirstRecordAsHeader();
        }

        return csvFormat.
                withIgnoreSurroundingSpaces().
                withIgnoreEmptyLines().
                withTrim().
                withDelimiter(',').
                parse(new StringReader(content));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
 
Example 14
Source Project: DataflowTemplates   Source File: CsvConverters.java    License: Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context, MultiOutputReceiver outputReceiver) {
  ReadableFile f = context.element();
  String headers;
  List<String> records = null;
  String delimiter = String.valueOf(this.csvFormat.getDelimiter());
  try {
    String csvFileString = f.readFullyAsUTF8String();
    StringReader reader = new StringReader(csvFileString);
    CSVParser parser = CSVParser.parse(reader, this.csvFormat.withFirstRecordAsHeader());
    records =
        parser.getRecords().stream()
            .map(i -> String.join(delimiter, i))
            .collect(Collectors.toList());
    headers = String.join(delimiter, parser.getHeaderNames());
  } catch (IOException ioe) {
    LOG.error("Headers do not match, consistency cannot be guaranteed");
    throw new RuntimeException("Could not read Csv headers: " + ioe.getMessage());
  }
  outputReceiver.get(this.headerTag).output(headers);
  records.forEach(r -> outputReceiver.get(this.linesTag).output(r));
}
 
Example 15
Source Project: rmlmapper-java   Source File: CSVRecordFactory.java    License: MIT License 6 votes vote down vote up
/**
 * This method returns a CSVParser from a simple access (local/remote CSV file; no CSVW).
 * @param access the used access.
 * @return a CSVParser.
 * @throws IOException
 */
private CSVParser getParserForNormalCSV(Access access) throws IOException, SQLException, ClassNotFoundException {
    CSVFormat csvFormat = CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord(false);
    InputStream inputStream = access.getInputStream();

    if (inputStream != null) {
        try {
            return CSVParser.parse(inputStream, StandardCharsets.UTF_8, csvFormat);
        } catch (IllegalArgumentException e) {
            // TODO give warning to user
            return null;
        }
    } else {
        return null;
    }
}
 
Example 16
Source Project: PolyGlot   Source File: ImportFileHelper.java    License: MIT License 6 votes vote down vote up
/**
 * Collects all rows from given CSV file and returns string input values
 *
 * @param inputFile path of file to read
 * @return List of rows
 * @throws FileNotFoundException if CSV does not exist
 * @throws IOException if read error
 */
private List<List<String>> getRows(String inputFile, CSVFormat format) throws FileNotFoundException, IOException, MalformedInputException {
    List<List<String>> ret = new ArrayList<>();
    
    try (
        Reader reader = Files.newBufferedReader(Paths.get(inputFile));
        CSVParser csvParser = new CSVParser(reader, format)
    ) {
        for (CSVRecord csvRecord : csvParser) {
            List<String> row = new ArrayList<>();
            
            for (int i = 0; i < csvRecord.size(); i++) {
                row.add(csvRecord.get(i));
            }
            
            ret.add(deNullRow(row));
        }
    }

    return ret;
}
 
Example 17
Source Project: neodymium-library   Source File: CsvFileReader.java    License: MIT License 6 votes vote down vote up
public static List<Map<String, String>> readFile(InputStream inputStream)
{
    List<Map<String, String>> data = new LinkedList<>();
    CSVParser csvParser;
    try
    {
        csvParser = CSVParser.parse(inputStream, CHARSET_UTF8, CSV_FORMAT);
        for (CSVRecord record : csvParser.getRecords())
        {
            data.add(record.toMap());
        }

    }
    catch (IOException e)
    {
        throw new RuntimeException(e);
    }

    return data;
}
 
Example 18
Source Project: obevo   Source File: CsvReaderDataSource.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Putting this init here so that we can discover the file fields before running the actual rec
 */
public void init() {
    if (!this.initialized) {
        try {
            MutableList<String> fields;
            if (csvVersion == CsvStaticDataReader.CSV_V2) {
                CSVFormat csvFormat = CsvStaticDataReader.getCsvFormat(delim, nullToken);
                this.csvreaderV2 = new CSVParser(reader, csvFormat);
                this.iteratorV2 = csvreaderV2.iterator();
                fields = ListAdapter.adapt(IteratorUtils.toList(iteratorV2.next().iterator()));
            } else {
                this.csvreaderV1 = new au.com.bytecode.opencsv.CSVReader(this.reader, this.delim);
                fields = ArrayAdapter.adapt(this.csvreaderV1.readNext());
            }

            this.fields = fields.collect(this.convertDbObjectName);
        } catch (Exception e) {
            throw new DeployerRuntimeException(e);
        }
        this.initialized = true;
    }
}
 
Example 19
protected void processCSVUpload(InputStream input, List<Map<QName,String>> users)
    throws IOException
{
    InputStreamReader reader = new InputStreamReader(input, Charset.forName("UTF-8"));
    CSVFormat format = CSVFormat.EXCEL;
    CSVParser csv = format.parse(reader);

    String[][] data = csv.getRecords().stream()
        .map(record -> {
            List<String> recordValues = new ArrayList<>();
            record.iterator().forEachRemaining(recordValues::add);
            return recordValues.toArray(String[]::new);
        }).toArray(String[][]::new);

    if (data.length > 0)
    {
        processSpreadsheetUpload(data, users);
    }
}
 
Example 20
Source Project: Insights   Source File: BulkUploadService.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Send records to getToolFileDetails() and store the output in neo4j database
 *
 * @param csvParser
 * @param label
 * @param insightsTimeField
 * @param insightsTimeFormat
 * @return boolean
 * @throws InsightsCustomException
 */
private boolean parseCsvRecords(CSVParser csvParser, String label, String insightsTimeField,
		String insightsTimeFormat) throws InsightsCustomException {
	List<JsonObject> nodeProperties = new ArrayList<>();
	String query = "UNWIND {props} AS properties " + "CREATE (n:" + label.toUpperCase() + ") "
			+ "SET n = properties";
	Map<String, Integer> headerMap = csvParser.getHeaderMap();
	try {
		if (headerMap.containsKey("")) {
			throw new InsightsCustomException("Error in file.");
		} else if (headerMap.containsKey(insightsTimeField)) {
			for (CSVRecord csvRecord : csvParser.getRecords()) {
				JsonObject json = getCSVRecordDetails(csvRecord, headerMap, insightsTimeField, insightsTimeFormat);
				nodeProperties.add(json);
			}
		} else {
			throw new InsightsCustomException("Insights Time Field not present in csv file");
		}
		insertDataInDatabase(nodeProperties, query);
		return true;
	} catch (Exception ex) {
		log.error("Error while parsing the .CSV records. {} ", ex.getMessage());
		throw new InsightsCustomException(ex.getMessage());
	}
}
 
Example 21
Source Project: Insights   Source File: DataProcessorUtil.java    License: Apache License 2.0 6 votes vote down vote up
private boolean parseCsvRecords(boolean status, CSVParser csvParser, Neo4jDBHandler dbHandler,
		Map<String, Integer> headerMap, String query)
		throws IOException, GraphDBException, InsightsCustomException {
	List<JsonObject> nodeProperties = new ArrayList<>();
	List<String> combo = new ArrayList<>();
	getCurrentRecords(combo, dbHandler);
	int record = 0;
	for (CSVRecord csvRecord : csvParser.getRecords()) {
		JsonObject json = getHierachyDetails(csvRecord, headerMap);
		record = record + 1;
		json.addProperty(DatataggingConstants.METADATA_ID, Instant.now().getNano() + record);
		json.addProperty(DatataggingConstants.CREATIONDATE, Instant.now().toEpochMilli());
		nodeProperties.add(json);
		updateComboList(combo, json);
	}
	JsonObject graphResponse = dbHandler.bulkCreateNodes(nodeProperties, null, query);
	if (graphResponse.get(DatataggingConstants.RESPONSE).getAsJsonObject().get(DatataggingConstants.ERRORS)
			.getAsJsonArray().size() > 0) {
		log.error(graphResponse);
		return status;
	}

	return true;
}
 
Example 22
Source Project: fullstop   Source File: CredentialReportCSVParserImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public List<CSVReportEntry> apply(final GetCredentialReportResult report) {
    Assert.state(Textcsv.toString().equals(report.getReportFormat()), "unknown credential report format: " + report.getReportFormat());

    try (final Reader r = new BufferedReader(new InputStreamReader(new ByteBufferBackedInputStream(report.getContent())))) {
        final CSVParser parser = new CSVParser(r, CSV_FORMAT);
        final Map<String, Integer> headers = parser.getHeaderMap();

        Assert.state(headers.containsKey("user"), "Header 'user' not found in CSV");
        Assert.state(headers.containsKey("arn"), "Header 'arn' not found in CSV");
        Assert.state(headers.containsKey("password_enabled"), "Header 'password_enabled' not found in CSV");
        Assert.state(headers.containsKey("mfa_active"), "Header 'mfa_active' not found in CSV");
        Assert.state(headers.containsKey("access_key_1_active"), "Header 'access_key_1_active' not found in CSV");
        Assert.state(headers.containsKey("access_key_2_active"), "Header 'access_key_2_active' not found in CSV");

        return stream(parser.spliterator(), false).map(this::toCSVReportEntry).filter(Objects::nonNull).collect(toList());
    } catch (final IOException e) {
        throw new RuntimeException("Could not read csv report", e);
    }
}
 
Example 23
Source Project: find   Source File: PlatformDataExportServiceIT.java    License: MIT License 6 votes vote down vote up
@Test
public void exportToCsv() throws E, IOException {
    final R queryRequest = queryRequestBuilderFactory.getObject()
            .queryRestrictions(testUtils.buildQueryRestrictions())
            .queryType(QueryRequest.QueryType.MODIFIED)
            .build();

    final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    exportService.exportQueryResults(outputStream, queryRequest, ExportFormat.CSV, Collections.emptyList(), 1001L);
    final String output = outputStream.toString();
    assertNotNull(output);

    try (final CSVParser csvParser = CSVParser.parse(output, CSVFormat.EXCEL)) {
        final List<CSVRecord> records = csvParser.getRecords();
        assertThat(records, not(empty()));
        final CSVRecord headerRecord = records.get(0);
        assertThat(headerRecord.get(0), endsWith("Reference")); // byte-order mark may get in the way
        assertEquals("Database", headerRecord.get(1));
        final CSVRecord firstDataRecord = records.get(1);
        final String firstDataRecordReference = firstDataRecord.get(0);
        assertNotNull(firstDataRecordReference);
        assertFalse(firstDataRecordReference.trim().isEmpty());
        final String firstDataRecordDatabase = firstDataRecord.get(1);
        assertFalse(firstDataRecordDatabase.trim().isEmpty());
    }
}
 
Example 24
Source Project: nifi   Source File: CSVHeaderSchemaStrategy.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public RecordSchema getSchema(Map<String, String> variables, final InputStream contentStream, final RecordSchema readSchema) throws SchemaNotFoundException {
    if (this.context == null) {
        throw new SchemaNotFoundException("Schema Access Strategy intended only for validation purposes and cannot obtain schema");
    }

    try {
        final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables).withFirstRecordAsHeader();
        try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream));
            final CSVParser csvParser = new CSVParser(reader, csvFormat)) {

            final List<RecordField> fields = new ArrayList<>();
            for (final String columnName : csvParser.getHeaderMap().keySet()) {
                fields.add(new RecordField(columnName, RecordFieldType.STRING.getDataType(), true));
            }

            return new SimpleRecordSchema(fields);
        }
    } catch (final Exception e) {
        throw new SchemaNotFoundException("Failed to read Header line from CSV", e);
    }
}
 
Example 25
Source Project: marklogic-contentpump   Source File: DelimitedTextReader.java    License: Apache License 2.0 6 votes vote down vote up
protected void initParser(InputSplit inSplit) throws IOException,
    InterruptedException {
    fileIn = openFile(inSplit, true);
    if (fileIn == null) {
        return;
    }
    instream = new InputStreamReader(fileIn, encoding);

    bytesRead = 0;
    fileLen = inSplit.getLength();
    if (uriName == null) {
        generateId = conf.getBoolean(CONF_INPUT_GENERATE_URI, false);
        if (generateId) {
            idGen = new IdGenerator(file.toUri().getPath() + "-"
                + ((FileSplit) inSplit).getStart());
        } else {
            uriId = 0;
        }
    }
    parser = new CSVParser(instream, CSVParserFormatter.
    		getFormat(delimiter, encapsulator, true,
    				true));
    parserIterator = parser.iterator();
}
 
Example 26
Source Project: TomboloDigitalConnector   Source File: DataExportEngineTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testExportsCSV() throws Exception {
    DataExportSpecificationBuilder csvBuilder = DataExportSpecificationBuilder.withCSVExporter();
    csvBuilder
            .addSubjectSpecification(
                    new SubjectSpecificationBuilder(AbstractONSImporter.PROVIDER.getLabel(), "lsoa").setMatcher("label", "E01002766"))
            .addDatasourceSpecification("uk.org.tombolo.importer.ons.CensusImporter", "qs103ew", "")
            .addFieldSpecification(
                    FieldBuilder.fractionOfTotal("percentage_under_1_years_old_label")
                            .addDividendAttribute("uk.gov.ons", "Age: Age under 1") // number under one year old
                            .setDivisorAttribute("uk.gov.ons", "Age: All categories: Age") // total population
            );

    engine.execute(csvBuilder.build(), writer, emptyImporterMatcher);

    List<CSVRecord> records = CSVParser.parse(writer.toString(), CSVFormat.DEFAULT.withHeader()).getRecords();

    assertEquals(1, records.size());
    assertEquals("E01002766", records.get(0).get("label"));
    assertEquals("0.012263099219620958", records.get(0).get("percentage_under_1_years_old_label"));
}
 
Example 27
public void importMeta() throws IOException {
    TimestreamMetaTable metaTable = db.metaTable();
    String [] fileHeader = {"分类", "名称", "监管号", "受理号", "生产日期", "有效日期", "注册分类", "申请类型", "企业名称", "任务类型"};
    String csvFile = conf.getMetaFile();
    CSVFormat format = CSVFormat.DEFAULT.withHeader(fileHeader).withIgnoreHeaderCase().withTrim();
    Reader reader = Files.newBufferedReader(Paths.get(csvFile));
    CSVParser csvParser = new CSVParser(reader, format);
    for (CSVRecord r : csvParser.getRecords()) {
        TimestreamIdentifier identifier = new TimestreamIdentifier.Builder(r.get("分类"))
                .addTag("名称", r.get("名称"))
                .addTag("监管号", r.get("监管号"))
                .build();
        TimestreamMeta meta = new TimestreamMeta(identifier);

        meta.addAttribute("produced_date", r.get("生产日期"));
        meta.addAttribute("period_of_validity", r.get("有效日期"));

        List<String> extension = new ArrayList();
        extension.add("受理号=" + r.get("受理号"));
        extension.add("注册分类=" + r.get("注册分类"));
        extension.add("申请类型=" + r.get("申请类型"));
        extension.add("企业名称=" + r.get("企业名称"));
        extension.add("任务类型=" + r.get("任务类型"));
        meta.addAttribute("extension", new Gson().toJson(extension));

        metaTable.put(meta);
        System.out.println(meta.toString());
    }
}
 
Example 28
@NotNull
static PrimaryTumorToDOIDMapper createFromResource() throws IOException {
    final CSVParser parser = CSVParser.parse(TUMOR_LOCATION_MAPPING_CSV, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());
    Map<String, Set<String>> doidsPerPrimaryTumor = Maps.newHashMap();
    for (final CSVRecord record : parser) {
        final String primaryTumorLocation = record.get("primaryTumorLocation");
        final String doids = record.get("doids");

        doidsPerPrimaryTumor.put(primaryTumorLocation, toSet(doids));
    }

    return new PrimaryTumorToDOIDMapper(doidsPerPrimaryTumor);
}
 
Example 29
Source Project: macrobase   Source File: SchemaResource.java    License: Apache License 2.0 5 votes vote down vote up
@PUT
@Consumes(MediaType.APPLICATION_JSON)
public SchemaResponse getSchema(SchemaRequest request) {
    SchemaResponse response = new SchemaResponse();

    if (request.baseQuery == null) {
        response.errorMessage = "Please enter a base query";
        return response;
    }

    try {
        // temp hack to enable CSV loading
        if(request.baseQuery.contains("csv://")) {
            File csvFile = new File(request.baseQuery.replace("csv://", ""));
            CSVParser p = CSVParser.parse(csvFile, Charset.defaultCharset(), CSVFormat.DEFAULT.withHeader());

            Schema s = new Schema(new ArrayList<>());
            for(String header : p.getHeaderMap().keySet()) {
                s.getColumns().add(new Schema.SchemaColumn(header, "entry"));
            }
            response.schema = s;
        } else {
            conf.set(MacroBaseConf.DB_URL, request.pgUrl);
            conf.set(MacroBaseConf.BASE_QUERY, request.baseQuery);
            response.schema = ((SQLIngester) getLoader()).getSchema(request.baseQuery);
        }
    } catch (Exception e) {
        log.error("An error occurred while processing a request:", e);
        response.errorMessage = ExceptionUtils.getStackTrace(e);
    }

    return response;
}
 
Example 30
Source Project: constellation   Source File: HashmodCSVImportFileParser.java    License: Apache License 2.0 5 votes vote down vote up
public List<String[]> parse(final HashmodInputSource input, final PluginParameters parameters) throws IOException {
    final ArrayList<String[]> results = new ArrayList<>();
    try (final CSVParser csvFileParser = CSVFormat.RFC4180.parse(new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8.name()))) {
        final List<CSVRecord> records = csvFileParser.getRecords();
        for (final CSVRecord record : records) {
            final String[] line = new String[record.size()];
            for (int i = 0; i < record.size(); i++) {
                line[i] = record.get(i);
            }
            results.add(line);
        }
    }
    return results;
}