com.opencsv.CSVParser Java Examples

The following examples show how to use com.opencsv.CSVParser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CsvReaderExamples.java    From tutorials with MIT License 6 votes vote down vote up
public static List<String[]> oneByOne(Reader reader) {
    List<String[]> list = new ArrayList<>();
    try {
        CSVParser parser = new CSVParserBuilder()
                .withSeparator(',')
                .withIgnoreQuotations(true)
                .build();

        CSVReader csvReader = new CSVReaderBuilder(reader)
                .withSkipLines(0)
                .withCSVParser(parser)
                .build();

        String[] line;
        while ((line = csvReader.readNext()) != null) {
            list.add(line);
        }
        reader.close();
        csvReader.close();
    } catch (Exception ex) {
        Helpers.err(ex);
    }
    return list;
}
 
Example #2
Source File: AbstractNiFiCommand.java    From nifi with Apache License 2.0 6 votes vote down vote up
protected static Set<TenantEntity> generateTenantEntities(final String groups, final UserGroupsEntity existingGroups)
    throws IOException, CommandException {
    final CSVParser csvParser = new CSVParser();
    final String[] groupArray = csvParser.parseLine(groups);
    final Set<TenantEntity> tenantEntities = new LinkedHashSet<>();

    for (String group : groupArray) {
        Optional<UserGroupEntity> existingGroup = existingGroups.getUserGroups().stream()
            .filter(entity -> group.equals(entity.getComponent().getIdentity())).findAny();

        if (!existingGroup.isPresent()) {
            throw new CommandException("User group with the identity '" + group + "' not found.");
        }

        tenantEntities.add(createTenantEntity(existingGroup.get().getId(), group));
    }

    return tenantEntities;
}
 
Example #3
Source File: AbstractNiFiCommand.java    From nifi with Apache License 2.0 6 votes vote down vote up
protected static Set<TenantEntity> generateTenantEntities(final String users, final UsersEntity existingUsers)
    throws IOException, CommandException {
    final CSVParser csvParser = new CSVParser();
    final String[] userArray = csvParser.parseLine(users);
    final Set<TenantEntity> tenantEntities = new LinkedHashSet<>();

    for (String user : userArray) {
        Optional<UserEntity> existingUser = existingUsers.getUsers().stream()
            .filter(entity -> user.equals(entity.getComponent().getIdentity())).findAny();

        if (!existingUser.isPresent()) {
            throw new CommandException("User with the identity '" + user + "' not found.");
        }

        tenantEntities.add(createTenantEntity(existingUser.get().getId(), user));
    }

    return tenantEntities;
}
 
Example #4
Source File: CsvToJsonConverterV2Test.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public void convertOutputAddingNull() throws IOException, DataConversionException {
  JsonParser parser = new JsonParser();
  JsonElement jsonElement = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/csv/schema_with_11_fields.json")));

  JsonArray outputSchema = jsonElement.getAsJsonArray();
  CSVParser csvParser = new CSVParser();
  String[] inputRecord = csvParser.parseLine(row11Cols);

  CsvToJsonConverterV2 converter = new CsvToJsonConverterV2();
  WorkUnitState wuState = new WorkUnitState();
  wuState.setProp(CsvToJsonConverterV2.CUSTOM_ORDERING, "0,1,-1,3,4,5,6,7,8,9,10");
  converter.init(wuState);

  JsonObject actual = converter.convertRecord(outputSchema, inputRecord, wuState).iterator().next();
  JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/csv/11_fields_with_null.json")))
                              .getAsJsonObject();
  Assert.assertEquals(expected, actual);
  converter.close();
}
 
Example #5
Source File: CsvToJsonConverterV2Test.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public void convertOutputMismatchFields() throws IOException {
  JsonParser parser = new JsonParser();
  JsonElement jsonElement = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/csv/schema_with_10_fields.json")));

  JsonArray outputSchema = jsonElement.getAsJsonArray();
  CSVParser csvParser = new CSVParser();
  String[] inputRecord = csvParser.parseLine(row11Cols);

  CsvToJsonConverterV2 converter = new CsvToJsonConverterV2();
  try {
    converter.createOutput(outputSchema, inputRecord);
    Assert.fail();
  } catch (Exception e) {

  }
  converter.close();
}
 
Example #6
Source File: CsvToJsonConverterV2Test.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public void convertOutputSkippingField() throws IOException, DataConversionException {
  JsonParser parser = new JsonParser();
  JsonElement jsonElement = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/csv/schema_with_10_fields.json")));

  JsonArray outputSchema = jsonElement.getAsJsonArray();
  CSVParser csvParser = new CSVParser();
  String[] inputRecord = csvParser.parseLine(row11Cols);

  CsvToJsonConverterV2 converter = new CsvToJsonConverterV2();
  WorkUnitState wuState = new WorkUnitState();
  wuState.setProp(CsvToJsonConverterV2.CUSTOM_ORDERING, "0,1,3,4,5,6,7,8,9,10");
  converter.init(wuState);

  JsonObject actual = converter.convertRecord(outputSchema, inputRecord, wuState).iterator().next();
  JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/csv/10_fields.json")))
                              .getAsJsonObject();

  Assert.assertEquals(expected, actual);
  converter.close();
}
 
Example #7
Source File: CsvToJsonConverterV2Test.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public void convertOutput() throws IOException {
  JsonParser parser = new JsonParser();
  JsonElement jsonElement = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/csv/schema_with_10_fields.json")));

  JsonArray outputSchema = jsonElement.getAsJsonArray();
  CSVParser csvParser = new CSVParser();
  String[] inputRecord = csvParser.parseLine(row10Cols);

  CsvToJsonConverterV2 converter = new CsvToJsonConverterV2();
  converter.init(new WorkUnitState());
  JsonObject actual = converter.createOutput(outputSchema, inputRecord);
  JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/csv/10_fields.json")))
                              .getAsJsonObject();

  Assert.assertEquals(expected, actual);
  converter.close();
}
 
Example #8
Source File: CsvReaderExamples.java    From tutorials with MIT License 6 votes vote down vote up
public static List<String[]> readAll(Reader reader) {

        CSVParser parser = new CSVParserBuilder()
                .withSeparator(',')
                .withIgnoreQuotations(true)
                .build();

        CSVReader csvReader = new CSVReaderBuilder(reader)
                .withSkipLines(0)
                .withCSVParser(parser)
                .build();

        List<String[]> list = new ArrayList<>();
        try {
            list = csvReader.readAll();
            reader.close();
            csvReader.close();
        } catch (Exception ex) {
            Helpers.err(ex);
        }
        return list;
    }
 
Example #9
Source File: CsvContentExtractor.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException {
  super.doProcessStream(stream, source, jCas);
  CSVParser parser = new CSVParserBuilder().withSeparator(separator.charAt(0)).build();
  try (CSVReader reader =
      new CSVReaderBuilder(new InputStreamReader(stream, StandardCharsets.UTF_8))
          .withCSVParser(parser)
          .build()) {
    String[] cols = reader.readNext();
    if (cols == null || cols.length < contentColumn) {
      throw new IOException("Not enough columns");
    }

    for (int i = 0; i < cols.length; i++) {
      if (i == (contentColumn - 1)) {
        jCas.setDocumentText(cols[i]);
      } else {
        addMetadata(jCas, i, cols[i]);
      }
    }
  }
}
 
Example #10
Source File: CSVConnector.java    From TAcharting with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Reads a csv file with structure of yahoo api: No info line with name and timeFormatId, just header line and
 * {@link TimeFormatType timeFormat YAHOO}
 * @param name the name of this symbol
 * @param file the csv file with financial data in yahoo format
 * @return the corresponding TimeSeries object
 * @throws IOException IOException
 */
public TaBarSeries getSeriesFromYahooFile(String name, File file) throws IOException{
    CSVReader reader = new CSVReaderBuilder(new FileReader(file)).withCSVParser(new CSVParser()).build();
    String line[];
    line = reader.readNext();
    Map<Parameter.Columns, Integer> headers = FormatUtils.getHeaderMap(Arrays.asList(line));
    List<Bar> Bars = new ArrayList<>();
    while((line = reader.readNext()) != null) {
        Bars.add(FormatUtils.extractOHLCData(
                headers, DateTimeFormatter.ofPattern(TimeFormatType.YAHOO.pattern),line,false));
    }
    reader.close();
    if(Bars.get(Bars.size()-1).getEndTime().isBefore(Bars.get(0).getEndTime())){
        Collections.reverse(Bars);
    }
    String yahooIntervall = YahooSettingsManager.getProperties().getProperty(Parameter.PROPERTY_YAHOO_INTERVAL);
    GeneralTimePeriod timePeriod = YahooTimePeriod.of(yahooIntervall).generalTimePeriod;
    return new TaBarSeries(name==null?"unnamed":name.toUpperCase(),Bars,Currency.getInstance("USD"),timePeriod);
}
 
Example #11
Source File: OurAirportsAirportProvider.java    From MetarParser with MIT License 6 votes vote down vote up
/**
 * Connects to the airports list and build a map of {@link Airport} with the name as key.
 *
 * @throws CsvValidationException when the parsing of the file fails
 * @throws IOException            when network error
 * @throws URISyntaxException     when the URI is invalid
 */
public void buildAirport() throws URISyntaxException, IOException, CsvValidationException {
    URI airportsURI = new URI(AIRPORT_URI);
    airports = new HashMap<>();
    try (InputStream airportStream = airportsURI.toURL().openStream();
            CSVReader reader = new CSVReaderBuilder(new InputStreamReader(airportStream, StandardCharsets.UTF_8)).withCSVParser(new CSVParser()).withSkipLines(1).build()) {
        String[] line;

        while ((line = reader.readNext()) != null) {
            Airport airport = new Airport();
            airport.setIcao(line[1]);
            airport.setName(line[3]);
            airport.setLatitude(NumberUtils.toDouble(line[4], 0));
            airport.setLongitude(NumberUtils.toDouble(line[5], 0));
            airport.setAltitude(NumberUtils.toInt(line[6], 0));
            airport.setCountry(countries.get(line[8]));
            airport.setCity(line[10]);
            airport.setIata(line[13]);
            airports.put(airport.getIcao(), airport);
        }
    }
}
 
Example #12
Source File: DefaultAirportProvider.java    From MetarParser with MIT License 6 votes vote down vote up
/**
 * Initiate airports map.
 */
private void initAirports() {
    Objects.requireNonNull(airportsFile);
    airports = new HashMap<>();
    String[] line;
    try (CSVReader reader = new CSVReaderBuilder(new InputStreamReader(airportsFile, StandardCharsets.UTF_8)).withCSVParser(new CSVParser()).withSkipLines(0).build()) {
        while ((line = reader.readNext()) != null) {
            Airport airport = new Airport();
            airport.setName(line[1]);
            airport.setCity(line[2]);
            airport.setCountry(countries.get(line[3]));
            airport.setIata(line[4]);
            airport.setIcao(line[5]);
            airport.setLatitude(Double.parseDouble(line[6]));
            airport.setLongitude(Double.parseDouble(line[7]));
            airport.setAltitude(Integer.parseInt(line[8]));
            airport.setTimezone(line[9]);
            airport.setDst(line[10]);
            airports.put(airport.getIcao(), airport);
        }
    } catch (IOException | CsvValidationException exception) {
        throw new IllegalStateException(exception.getMessage());
    }
}
 
Example #13
Source File: TraitFileClean.java    From systemsgenetics with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws FileNotFoundException, IOException {
    // TODO code application logic here 
    File phase3File = new File("C:\\Users\\Sophie Mulc\\Documents\\DEPICT2\\phase3_corrected.psam");
    File traitFile = new File("C:\\Users\\Sophie Mulc\\Documents\\DEPICT2\\TraitFile.txt");
    File probeAnnotationFile = new File("C:\\Users\\Sophie Mulc\\Documents\\DEPICT2\\ProbeAnnotationFile.txt");
    File couplingFile = new File("C:\\Users\\Sophie Mulc\\Documents\\DEPICT2\\CouplingFile.txt");
    //FileReader(String phase3_corrected)
    final CSVParser gmtParser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
    final CSVReader gmtReader = new CSVReaderBuilder(new BufferedReader(new FileReader(phase3File))).withSkipLines(1).withCSVParser(gmtParser).build();


    List<String> iids = new ArrayList<>();

    String[] inputLine;
    while ((inputLine = gmtReader.readNext()) != null) {

        String iid = inputLine[0];

        iids.add(iid);
    }

    trait(iids, traitFile);
    probeAnnotation(probeAnnotationFile);
    coupling(iids, couplingFile);
}
 
Example #14
Source File: DatasetDescriptor.java    From akka-tutorial with Apache License 2.0 5 votes vote down vote up
public CSVReader createCSVReader() throws IOException {
	Path path = Paths.get(this.datasetPath + this.datasetName + this.datasetEnding);
	
	CSVParser parser = new CSVParserBuilder()
			.withSeparator(this.valueSeparator)
			.withQuoteChar(this.valueQuote)
			.withEscapeChar(this.valueEscape)
			.withStrictQuotes(this.valueStrictQuotes)
			.withIgnoreLeadingWhiteSpace(this.valueIgnoreLeadingWhitespace)
			.withFieldAsNull(CSVReaderNullFieldIndicator.EMPTY_SEPARATORS)
			.build();
	
	BufferedReader buffer = Files.newBufferedReader(path, this.charset);
	CSVReader reader = new CSVReaderBuilder(buffer).withCSVParser(parser).build();
	
	if (this.fileHasHeader)
		reader.readNext();
	
	return reader;
}
 
Example #15
Source File: CSVConnector.java    From TAcharting with GNU Lesser General Public License v2.1 5 votes vote down vote up
@Override
  public boolean connect(File resource){
      String separator = properties.getProperty(Parameter.PROPERTY_CSV_SEPARATOR, ",");
      String quote = properties.getProperty(Parameter.PROPERTY_CSV_QUOTE, "\\\\");
      CSVParser parser = new CSVParserBuilder().withSeparator(separator.charAt(0)).withQuoteChar(quote.charAt(0)).build();
      try(CSVReader reader = new CSVReaderBuilder(new FileReader(resource)).withCSVParser(parser).build();)
      {
      	lines = reader.readAll();
      	String[] infoLine = lines.get(0);
	name = infoLine[0];
       id = FormatUtils.extractInteger(infoLine[1]);
       isDateTwoColumn = id == TimeFormatType.yyyy_MM_ddHmsz.id;
       dateTimeFormatter = FormatUtils.getDateTimeFormatter(id);
       String currencyString = null;
       if(infoLine.length>2) {
           currencyString = infoLine[2].replaceAll("\\s", "");
       }
       if(currencyString == null || currencyString.length() != 3)
           currencyString = Parameter.DEFAULT_CURRENCY;
       currency = Currency.getInstance(currencyString);
       lines.remove(0); // remove InfoLine
} catch (FileNotFoundException e) {
	log.error(e.getMessage());
	e.printStackTrace();
	return false;
} catch (IOException ioe) {
	log.error(ioe.getMessage());
	ioe.printStackTrace();
	return false;
}
      return true;
  }
 
Example #16
Source File: SnowflakeIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) throws IOException {
  String csvLine = c.element();
  CSVParser parser = new CSVParserBuilder().withQuoteChar(CSV_QUOTE_CHAR.charAt(0)).build();
  String[] parts = parser.parseLine(csvLine);
  c.output(parts);
}
 
Example #17
Source File: OurAirportsAirportProvider.java    From MetarParser with MIT License 5 votes vote down vote up
/**
 * Connects to the countries list and build a map of {@link Country} with the name as key.
 *
 * @throws CsvValidationException when the parsing of the file fails
 * @throws IOException            when network error
 * @throws URISyntaxException     when the URI is invalid
 */
public void buildCountries() throws URISyntaxException, IOException, CsvValidationException {
    countries = new HashMap<>();
    URI countriesUri = new URI(COUNTRIES_URI);
    try (InputStream countriesStream = countriesUri.toURL().openStream();
            CSVReader reader = new CSVReaderBuilder(new InputStreamReader(countriesStream, StandardCharsets.UTF_8)).withCSVParser(new CSVParser()).withSkipLines(1).build()) {
        String[] line;
        while ((line = reader.readNext()) != null) {
            Country c = new Country();
            c.setName(line[2]);
            countries.put(line[1], c);
        }
    }
}
 
Example #18
Source File: ConvertHpoToMatrix.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
private static HashMap<String, HashSet<String>> readHpoFile(File hpoFile, HashMap<String, ArrayList<String>> ncbiToEnsgMap, HashMap<String, ArrayList<String>> hgncToEnsgMap) throws Exception {

		final CSVParser hpoParser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader hpoReader = new CSVReaderBuilder(new BufferedReader(new FileReader(hpoFile))).withSkipLines(1).withCSVParser(hpoParser).build();

		HashMap<String, HashSet<String>> hpoToGenes = new HashMap<>();

		String[] nextLine;
		while ((nextLine = hpoReader.readNext()) != null) {
			String hpo = nextLine[0];
			String ncbiId = nextLine[2];
			String hgcnId = nextLine[3];
			ArrayList<String> ensgIds = ncbiToEnsgMap.get(ncbiId);
			if (ensgIds == null) {
				ensgIds = hgncToEnsgMap.get(hgcnId);
			}
			if (ensgIds == null) {
				System.err.println("Missing mapping for gene: " + ncbiId + " " + hgcnId);
			} else {

				HashSet<String> hpoGenes = hpoToGenes.get(hpo);
				if (hpoGenes == null) {
					hpoGenes = new HashSet<>();
					hpoToGenes.put(hpo, hpoGenes);
				}

				for (String ensgId : ensgIds) {
					hpoGenes.add(ensgId);
				}

			}

		}

		return hpoToGenes;

	}
 
Example #19
Source File: ConvertMyoclonusClustersToMatrix.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
private static HashMap<String, HashSet<String>> readClusterFile(File hpoFile) throws Exception {

		final CSVParser hpoParser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader hpoReader = new CSVReaderBuilder(new BufferedReader(new FileReader(hpoFile))).withSkipLines(0).withCSVParser(hpoParser).build();

		HashMap<String, HashSet<String>> hpoToGenes = new HashMap<>();

		HashSet<String> allMyoclonusGenes = new HashSet<>();
		hpoToGenes.put("AllMyoclonus", allMyoclonusGenes);

		String[] nextLine;
		while ((nextLine = hpoReader.readNext()) != null) {
			String gene = nextLine[0];
			String cluster = nextLine[1];

			HashSet<String> hpoGenes = hpoToGenes.get(cluster);
			if (hpoGenes == null) {
				hpoGenes = new HashSet<>();
				hpoToGenes.put(cluster, hpoGenes);
			}

			allMyoclonusGenes.add(gene);
			hpoGenes.add(gene);

		}

		return hpoToGenes;

	}
 
Example #20
Source File: IoUtils.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
public static final List<String> readMatrixAnnotations(File file) throws IOException {

        final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
        final CSVReader reader = new CSVReaderBuilder(new BufferedReader(new FileReader(file))).withCSVParser(parser).build();

        ArrayList<String> identifiers = new ArrayList<>();

        String[] nextLine;
        while ((nextLine = reader.readNext()) != null) {
            identifiers.add(nextLine[0]);
        }

        return identifiers;
    }
 
Example #21
Source File: StateCodeMappingResourceLoader.java    From arcusplatform with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, SameState> parse(InputStream is) {
	LOGGER.debug("Parsing NWS SAME Code State Name Mapping input data file");

	Map<String, SameState> sameStates = new HashMap<String,SameState>();

	try (CSVReader reader = new CSVReader(new InputStreamReader(is), 0, new CSVParser())) {
		String[] nextLine;
		while ((nextLine = reader.readNext()) != null) {
			if (nextLine.length < 2) { // error in input file, skip the record and continue
				continue;
			}

           /*
            * The NWS SAME code data file can contain whitespace around state
            * codes on load, strip this out
            */
			String stateCode = nextLine[0].trim(); 
			String state = nextLine[1];

			if (StringUtils.isEmpty(stateCode) || StringUtils.isEmpty(state)) {
				LOGGER.warn(
						"Invalid NWS SAME Code State Name mapping file record, null value(s) found while parsing input data file with values: State Code:{} and State Name:{}",
						stateCode, state);
				continue;
			}

			sameStates.put(stateCode, new SameState(stateCode, state));
		}
	} catch (Exception e) {
		LOGGER.warn("Error parsing NWS SAME Code State Name Mapping input data file", e);
		throw new IllegalStateException(e);
	}
	
	if (sameStates.isEmpty()) {
		return null;
	}
	
	return sameStates;
}
 
Example #22
Source File: ConvertTfDataUrmoToMatrix.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
private static HashMap<String, HashMap<String, HashSet<String>>> loadTfData(File tfFile) throws FileNotFoundException, IOException {

		final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader reader = new CSVReaderBuilder(new BufferedReader(new FileReader(tfFile))).withSkipLines(1).withCSVParser(parser).build();

		HashMap<String, HashMap<String, HashSet<String>>> tfdatabasesPathwayToGenes = new HashMap<>();

		String[] nextLine;
		while ((nextLine = reader.readNext()) != null) {

			if (nextLine[0].charAt(0) == '!') {
				continue;
			}

			String database = nextLine[0];
			String pathway = nextLine[1];
			String ensgId = nextLine[3];

			HashMap<String, HashSet<String>> pathwayToGenes = tfdatabasesPathwayToGenes.get(database);
			if (pathwayToGenes == null) {
				pathwayToGenes = new HashMap<>();
				tfdatabasesPathwayToGenes.put(database, pathwayToGenes);
			}

			HashSet<String> pathwayGenes = pathwayToGenes.get(pathway);
			if (pathwayGenes == null) {
				pathwayGenes = new HashSet<>();
				pathwayToGenes.put(pathway, pathwayGenes);
			}

			pathwayGenes.add(ensgId);

		}

		return tfdatabasesPathwayToGenes;

	}
 
Example #23
Source File: AbstractNiFiCommand.java    From nifi with Apache License 2.0 5 votes vote down vote up
protected static Set<TenantEntity> generateTenantEntities(final String ids)
    throws IOException {
    final CSVParser csvParser = new CSVParser();
    return Arrays.stream(csvParser.parseLine(ids))
        .map(AbstractNiFiCommand::createTenantEntity)
        .collect(Collectors.toCollection(LinkedHashSet::new));
}
 
Example #24
Source File: ConvertHpoToMatrixWith10ProcentRandom.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
private static HashMap<String, HashSet<String>> readHpoFile(File hpoFile, HashMap<String, ArrayList<String>> ncbiToEnsgMap, HashMap<String, ArrayList<String>> hgncToEnsgMap) throws Exception {

		final CSVParser hpoParser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader hpoReader = new CSVReaderBuilder(new BufferedReader(new FileReader(hpoFile))).withSkipLines(1).withCSVParser(hpoParser).build();

		HashMap<String, HashSet<String>> hpoToGenes = new HashMap<>();

		String[] nextLine;
		while ((nextLine = hpoReader.readNext()) != null) {
			String hpo = nextLine[0];
			String ncbiId = nextLine[2];
			String hgcnId = nextLine[3];
			ArrayList<String> ensgIds = ncbiToEnsgMap.get(ncbiId);
			if (ensgIds == null) {
				ensgIds = hgncToEnsgMap.get(hgcnId);
			}
			if (ensgIds == null) {
				System.err.println("Missing mapping for gene: " + ncbiId + " " + hgcnId);
			} else {

				HashSet<String> hpoGenes = hpoToGenes.get(hpo);
				if (hpoGenes == null) {
					hpoGenes = new HashSet<>();
					hpoToGenes.put(hpo, hpoGenes);
				}

				for (String ensgId : ensgIds) {
					hpoGenes.add(ensgId);
				}

			}

		}

		return hpoToGenes;

	}
 
Example #25
Source File: FilterPrioBasedOnMutatedGenes.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private static HashSet<String> getMutatedGenes(File genoFile) throws IOException {

		final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader reader = new CSVReaderBuilder(new BufferedReader(new FileReader(genoFile))).withSkipLines(1).withCSVParser(parser).build();

		HashSet<String> genes = new HashSet<>();

		String[] nextLine;
		while ((nextLine = reader.readNext()) != null) {

			genes.add(nextLine[8]);

		}

		reader.close();

		return genes;

	}
 
Example #26
Source File: ConvertMyoclonusClustersToMatrix.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private static ArrayList<String> readGenes(File geneOrderFile) throws IOException {

		final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader reader = new CSVReaderBuilder(new BufferedReader(new FileReader(geneOrderFile))).withSkipLines(0).withCSVParser(parser).build();

		String[] nextLine;
		ArrayList<String> geneOrder = new ArrayList<>();

		while ((nextLine = reader.readNext()) != null) {

			geneOrder.add(nextLine[0]);

		}

		return geneOrder;

	}
 
Example #27
Source File: InvestigateAucChildParent.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
public static TObjectDoubleMap<String> readSignificantPredictedHpoTermFile(File predictedHpoTermFile) throws FileNotFoundException, IOException {

		final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader reader = new CSVReaderBuilder(new BufferedReader(new FileReader(predictedHpoTermFile))).withSkipLines(1).withCSVParser(parser).build();

		TObjectDoubleMap<String> hpos = new TObjectDoubleHashMap<>();

		String[] nextLine;
		while ((nextLine = reader.readNext()) != null) {

			if(Double.parseDouble(nextLine[4]) <= 0.05){
				hpos.put(nextLine[0], Double.parseDouble(nextLine[3]));
			}
			

		}

		reader.close();

		return hpos;

	}
 
Example #28
Source File: ImproveHpoPredictionBasedOnChildTerms.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
public static LinkedHashSet<String> readPredictedHpoTermFile(File predictedHpoTermFile) throws FileNotFoundException, IOException {

		final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader reader = new CSVReaderBuilder(new BufferedReader(new FileReader(predictedHpoTermFile))).withSkipLines(1).withCSVParser(parser).build();

		LinkedHashSet<String> hpos = new LinkedHashSet<>();

		String[] nextLine;
		while ((nextLine = reader.readNext()) != null) {

			hpos.add(nextLine[0]);

		}

		reader.close();

		return hpos;

	}
 
Example #29
Source File: HpoGenePrioritisation.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
private static HashMap<String, LinkedHashSet<String>> loadCaseHpo(File caseHpoFile) throws FileNotFoundException, IOException {

		final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader reader = new CSVReaderBuilder(new BufferedReader(new FileReader(caseHpoFile))).withSkipLines(1).withCSVParser(parser).build();

		HashMap<String, LinkedHashSet<String>> caseHpo = new HashMap<>();

		String[] nextLine;
		while ((nextLine = reader.readNext()) != null) {

			if (nextLine[5].isEmpty()) {

				LinkedHashSet<String> hpo = caseHpo.get(nextLine[0]);
				if (hpo == null) {
					hpo = new LinkedHashSet<>();
					caseHpo.put(nextLine[0], hpo);
				}
				hpo.add(nextLine[1]);

			}

		}

		return caseHpo;

	}
 
Example #30
Source File: FilterPrioBasedOnMutatedGenes.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
/**
	 * @param args the command line arguments
	 */
	public static void main(String[] args) throws FileNotFoundException, IOException {
//
//		final File sampleFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations\\samplesWithGeno.txt");
//		final File genoFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations\\gavinRes\\");
//		final File prioFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations");
//		final File resultFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations\\rankingCandidateGenes");

//		final File sampleFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations3\\samplesWithGeno.txt");
//		final File genoFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations\\gavinRes\\");
//		final File prioFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations3");
//		final File resultFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations3\\rankingCandidateGenes");
		
		final File sampleFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\extraUnsolved\\samplesWithGeno.txt");
		final File genoFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\extraUnsolved\\gavinRes\\");
		final File prioFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\extraUnsolved\\Prioritisations");
		final File resultFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\extraUnsolved\\rankingCandidateGenes");

//		final File sampleFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsDcm\\samplesWithGeno.txt");
//		final File genoFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsDcm\\gavinRes\\");
//		final File prioFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsDcm");
//		final File resultFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsDcm\\rankingCandidateGenes");
//
//		final File sampleFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations3\\samplesWithGeno.txt");
//		final File genoFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\Prioritisations\\gavinRes\\");
//		final File prioFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsSpiked");
//		final File resultFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsSpiked\\rankingCandidateGenes");
//		final File sampleFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioMieke\\samplesWithGeno.txt");
//		final File genoFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioMieke\\Gavin\\");
//		final File prioFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioMieke\\Prioritisations");
//		final File resultFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioMieke\\");
//		
//		final File sampleFile = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioEdgar\\Prioritisations\\samples.txt");
//		final File genoFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioEdgar\\CandidateGenes\\");
//		final File prioFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioEdgar\\Prioritisations");
//		final File resultFolder = new File("C:\\UMCG\\Genetica\\Projects\\GeneNetwork\\BenchmarkSamples\\PrioritisationsCardioEdgar\\");
//
//		
		resultFolder.mkdirs();

		final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
		final CSVReader sampleFileReader = new CSVReaderBuilder(new BufferedReader(new FileReader(sampleFile))).withSkipLines(0).withCSVParser(parser).build();

		String[] nextLine;
		while ((nextLine = sampleFileReader.readNext()) != null) {

			String sample = nextLine[0];

			String genoSampleName = new File(nextLine[1]).getName();
			if (!genoSampleName.endsWith(".txt")) {
				genoSampleName += ".txt";
			}

			File genoFile = new File(genoFolder, genoSampleName);
			File prioFile = new File(prioFolder, sample + ".txt");
			File rankingFile = new File(resultFolder, sample + ".txt");

			System.out.println("------------------------------------------------------------------");
			System.out.println("Sample: " + sample);
			System.out.println("Geno: " + genoFile.getAbsolutePath());
			System.out.println("Prio: " + prioFile.getAbsolutePath());
			System.out.println("Ranking: " + rankingFile.getAbsolutePath());

			HashSet<String> genesWithMutation = getMutatedGenes(genoFile);

			final CSVReader prioFileReader = new CSVReaderBuilder(new BufferedReader(new FileReader(prioFile))).withSkipLines(0).withCSVParser(parser).build();

			CSVWriter writer = new CSVWriter(new FileWriter(rankingFile), '\t', '\0', '\0', "\n");

			String[] outputLine = prioFileReader.readNext();
			writer.writeNext(outputLine);

			while ((outputLine = prioFileReader.readNext()) != null) {

				if (genesWithMutation.contains(outputLine[1])) {
					writer.writeNext(outputLine);
				}

			}

			writer.close();
			prioFileReader.close();

		}

	}