Java Code Examples for org.apache.commons.csv.CSVFormat#newFormat()

The following examples show how to use org.apache.commons.csv.CSVFormat#newFormat() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MovieDataConfigurer.java    From jstarcraft-example with Apache License 2.0 5 votes vote down vote up
@Bean("movieItems")
List<MovieItem> getItems(DataSpace movieDataSpace) throws Exception {
    File movieItemFile = new File("data/ml-100k/u.item");
    List<MovieItem> items = new LinkedList<>();

    QualityAttribute<Integer> itemAttribute = movieDataSpace.getQualityAttribute("item");
    try (InputStream stream = new FileInputStream(movieItemFile); InputStreamReader reader = new InputStreamReader(stream, StringUtility.CHARSET); BufferedReader buffer = new BufferedReader(reader)) {
        try (CSVParser parser = new CSVParser(buffer, CSVFormat.newFormat('|'))) {
            Iterator<CSVRecord> iterator = parser.iterator();
            while (iterator.hasNext()) {
                CSVRecord datas = iterator.next();
                // 物品标识
                int id = Integer.parseInt(datas.get(0));
                // 物品索引
                int index = itemAttribute.convertData(id);
                // 物品标题
                String title = datas.get(1);
                // 物品日期
                LocalDate date = StringUtility.isEmpty(datas.get(2)) ? LocalDate.of(1970, 1, 1) : LocalDate.parse(datas.get(2), formatter);
                MovieItem item = new MovieItem(index, title, date);
                items.add(item);
            }
        }
    }

    items = new ArrayList<>(items);
    return items;
}
 
Example 2
Source File: CSVParserFormatter.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
/**
 * 
 * @param delimiter
 * @param encapsulator
 * @param ignoreSurroundingSpaces
 * @param ignoreEmptyLines
 * @return
 */
public static CSVFormat getFormat(char delimiter,
		Character encapsulator,
		boolean ignoreSurroundingSpaces, 
		boolean ignoreEmptyLines) {
	CSVFormat format = CSVFormat.newFormat(delimiter);
	format = format.withIgnoreEmptyLines(ignoreEmptyLines)
       		.withIgnoreSurroundingSpaces(ignoreSurroundingSpaces)
       		.withAllowMissingColumnNames(true)
       		.withQuote(encapsulator);
	
	return format;
}
 
Example 3
Source File: FileSourceUserGroupBuilder.java    From ranger with Apache License 2.0 5 votes vote down vote up
public Map<String, List<String>> readTextFile(File textFile) throws Exception {
	
	Map<String, List<String>> ret = new HashMap<String, List<String>>();
	
	String delimiter = config.getUserSyncFileSourceDelimiter();
	
	CSVFormat csvFormat = CSVFormat.newFormat(delimiter.charAt(0));
	
	CSVParser csvParser = new CSVParser(new BufferedReader(new FileReader(textFile)), csvFormat);
	
	List<CSVRecord> csvRecordList = csvParser.getRecords();
	
	if ( csvRecordList != null) {
		for(CSVRecord csvRecord : csvRecordList) {
			List<String> groups = new ArrayList<String>();
			String user = csvRecord.get(0);
			
			user = user.replaceAll("^\"|\"$", "");
				
			int i = csvRecord.size();
			
			for (int j = 1; j < i; j ++) {
				String group = csvRecord.get(j);
				if ( group != null && !group.isEmpty()) {
					 group = group.replaceAll("^\"|\"$", "");
					 groups.add(group);
				}
			}
			ret.put(user,groups);
		 }
	}

	csvParser.close();

	return ret;
}
 
Example 4
Source File: Utils.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Reads tab delimited file and returns them as list of list, with [x] =
 * colummn and [x][y] is value in column. Needed for reading counts
 * file, as there the rows are the samples, as opposed to expression and
 * genotype file where the columns are the samples. Needs to be read in
 * memory, so minimal memory requirement is larger than the size of the
 * counts file.
 * 
 * 
 * @param filepath The path to a tab delimited file to read
 * 
 * @return A 2D array with each array being one column from filepath except first column
 * 		   and a 1D array with the first column (without header)
 * 
 * @throws IOException	If file at filepath can not be read
 */
public static Object[] readTabDelimitedColumns(String filepath) throws IOException {
	List<List<String>> allColumns = new ArrayList<List<String>>();
	// parses file on tabs
	CSVParser parser = new CSVParser(new FileReader(filepath), CSVFormat.newFormat('\t'));
	Boolean header = true;
	int rowNumber = 0;
	int columnIndexHeader = 0;
	List<String> firstColumn = new ArrayList<String>();
	for (CSVRecord row : parser) {
		rowNumber++;
		// starts at 1 because 1st element of column is the samplename, unless its the header row
		int columnStart = 1;
		if(header){
			columnStart = 0;
		}
		for (int columnIndex = columnStart; columnIndex < row.size(); columnIndex++) {
			// header can start from 0 if it is R styled, so check if element 0 has a value
			// R style is e.g.
			// colNameA	colNameB
			// rowNameA	AAValue	AAvalue
			// rownameB ABValue BAvalue
			// while csv style has a tab before colNameA
			if(header){
				String columnValue = row.get(columnIndex);
				if(columnValue.length() == 0){
					continue;
				}
				allColumns = addSingleValueTo2DArray(allColumns, columnIndexHeader,columnValue);
				columnIndexHeader++;
				continue;
			}
			else{
				// This changes the allColumns list of list in place, e.g. for example loop -> [[]] -> [[1]] -> [[1,2]] -> [[1,2],[3]] -> etc
				allColumns = addSingleValueTo2DArray(allColumns, columnIndex - 1, row.get(columnIndex));
			}
		}
		if(!header){
			firstColumn.add(row.get(0));
			if(row.size()-1 != columnIndexHeader){
				DeconvolutionLogger.log.info(String.format("Table %s does not have the same number of columns as there are in the header at row %d",filepath,rowNumber));
				DeconvolutionLogger.log.info(String.format("Number of header columns: %d",columnIndexHeader));
				DeconvolutionLogger.log.info(String.format("Number of columns at row %d: %d", rowNumber, row.size()-1));
				DeconvolutionLogger.log.info(row.toString());
				parser.close();
				throw new RuntimeException(String.format("Cellcount percentage table does not have the same number of columns as there are celltypes at row %d",rowNumber));
			}
		}
		if(header){
			header = false;
		}

	}
	parser.close();
	return new Object[] {firstColumn, allColumns};
}