Java Code Examples for org.apache.commons.csv.CSVRecord#iterator()

The following examples show how to use org.apache.commons.csv.CSVRecord#iterator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FrameworkUtils.java    From data-polygamy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * String Parsing 
 */

public static String[] splitStr(String val, Integer len) throws IOException {
    
    String[] input;
    
    try {
        CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
        CSVRecord record = parser.getRecords().get(0);
        input = new String[len];
        Iterator<String> valuesIt = record.iterator();
        int i = 0;
        while (valuesIt.hasNext()) {
            input[i] = valuesIt.next().trim();
            i++;
        }
        parser.close();
    } catch (ArrayIndexOutOfBoundsException e) {
        input = val.split(",", len);
        for (int i = 0; i < input.length; i++)
            input[i] = input[i].trim();
    }
    
    return input;
}
 
Example 2
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) {
  /** convert from CSV row to DLP Table Row */
  Iterator<String> valueIterator = csvRow.iterator();
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  while (valueIterator.hasNext()) {
    String value = valueIterator.next();
    if (value != null) {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build());
    } else {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build());
    }
  }

  return tableRowBuilder.build();
}
 
Example 3
Source File: CsvReader.java    From jstarcraft-core with Apache License 2.0 5 votes vote down vote up
public CsvReader(InputStream inputStream, CodecDefinition definition) {
    super(definition);
    InputStreamReader buffer = new InputStreamReader(inputStream, StringUtility.CHARSET);
    try (CSVParser input = new CSVParser(buffer, FORMAT)) {
        Iterator<CSVRecord> iterator = input.iterator();
        if (iterator.hasNext()) {
            CSVRecord values = iterator.next();
            this.inputStream = values.iterator();
        }
    } catch (Exception exception) {
        throw new RuntimeException(exception);
    }
}
 
Example 4
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) {
  /** convert from CSV row to DLP Table Row */
  Iterator<String> valueIterator = csvRow.iterator();
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  while (valueIterator.hasNext()) {
    String value = valueIterator.next();
    if (value != null) {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build());
    } else {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build());
    }
  }

  return tableRowBuilder.build();
}
 
Example 5
Source File: DelimitedTextReader.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
protected String[] getLine(CSVRecord record)
        throws IOException {
    Iterator<String> recordIterator = record.iterator();
    int recordSize = record.size();
    String[] values = new String[recordSize];
    for (int i = 0; i < recordSize; i++) {
        if (recordIterator.hasNext()) {
            values[i] = (String)recordIterator.next();
        } else {
            throw new IOException("Record size doesn't match the real size");
        }
    }
    return values;
}
 
Example 6
Source File: CommandLineInterfaceTest.java    From utah-parser with Apache License 2.0 5 votes vote down vote up
private void assertCsvContent(String content, List<List<String>> expected) throws IOException {
    CSVParser csv = CSVParser.parse(content, CSVFormat.DEFAULT);

    List<List<String>> observed = new ArrayList<List<String>>();
    for (CSVRecord line : csv.getRecords()) {
        List<String> lineValues = new ArrayList<>();
        for (Iterator<String> seeker = line.iterator(); seeker.hasNext(); ) {
            lineValues.add(seeker.next());
        }
        observed.add(lineValues);
    }
    assertEquals(expected, observed);
}
 
Example 7
Source File: FrameworkUtils.java    From data-polygamy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static String[] splitStr(String val) throws IOException {
    
    CSVParser parser = new CSVParser(new StringReader(val), CSVFormat.DEFAULT);
    CSVRecord record = parser.getRecords().get(0);
    Iterator<String> valuesIt = record.iterator();
    String[] input = new String[record.size()];
    int i = 0;
    while (valuesIt.hasNext()) {
        input[i] = valuesIt.next();
        i++;
    }
    parser.close();
    return input;
}
 
Example 8
Source File: DelimitedTextInputFormat.java    From marklogic-contentpump with Apache License 2.0 4 votes vote down vote up
public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }

    if (splits.size()>= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:"
            + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter =0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file: splits) {
        FileSplit fsplit = ((FileSplit)file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);
        
        if (fsplit.getStart() == 0) {
        // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER,
                ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter
                    + ". Expects single character.");
            }
            String encoding = conf.get(
                MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream, CSVParserFormatter.
            		getFormat(delimiter, DelimitedTextReader.encapsulator,
            				true, true));
            Iterator<CSVRecord> it = parser.iterator();
            
            String[] header = null;
            if (it.hasNext()) {
            	CSVRecord record = (CSVRecord)it.next();
            	Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                	if (recordIterator.hasNext()) {
                		header[i] = (String)recordIterator.next();
                	} else {
                		throw new IOException("Record size doesn't match the real size");
                	}
                }
                
                EncodingUtil.handleBOMUTF8(header, 0);
                
                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }
        
        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(
            hlist.toArray(new Text[hlist.size()])), path,
            fsplit.getStart(), fsplit.getLength(),
            fsplit.getLocations());
        populatedSplits.add(ds);
    }
    
    return populatedSplits;
}
 
Example 9
Source File: DefaultConverter.java    From swagger-inflector with Apache License 2.0 4 votes vote down vote up
public Object cast(List<String> arguments, Parameter parameter, JavaType javaType, Map<String, Schema> definitions) throws ConversionException {
    if (arguments == null || arguments.size() == 0) {
        return null;
    }
    Class<?> cls = javaType.getRawClass();

    LOGGER.debug("converting array `" + arguments + "` to `" + cls + "`");
    if (javaType.isArrayType()) {
        if (parameter.getSchema() != null) {
            List<Object> output = new ArrayList<>();
            if (parameter.getSchema() instanceof ArraySchema) {
                ArraySchema arraySchema = (ArraySchema) parameter.getSchema();
                if (arraySchema.getItems() != null) {
                    Schema inner = arraySchema.getItems();

                    // TODO: this does not need to be done this way, update the helper method
                    Parameter innerParam = new QueryParameter().schema(inner);
                    JavaType innerClass = getTypeFromParameter(innerParam, definitions);
                    for (String obj : arguments) {
                        String[] parts = new String[0];
                        CSVFormat format = null;
                        if (Parameter.StyleEnum.FORM.equals(parameter.getStyle()) && !StringUtils.isEmpty(obj) && parameter.getExplode() == false) {
                            format = CSVFormat.DEFAULT;
                        } else if (Parameter.StyleEnum.PIPEDELIMITED.equals(parameter.getStyle()) && !StringUtils.isEmpty(obj)) {
                            format = CSVFormat.newFormat('|').withQuote('"');
                        } else if (Parameter.StyleEnum.SPACEDELIMITED.equals(parameter.getStyle()) && !StringUtils.isEmpty(obj)) {
                            format = CSVFormat.newFormat(' ').withQuote('"');
                        }
                        if (format != null) {
                            try {
                                for (CSVRecord record : CSVParser.parse(obj, format).getRecords()) {
                                    List<String> it = new ArrayList<String>();
                                    for (Iterator<String> x = record.iterator(); x.hasNext(); ) {
                                        it.add(x.next());
                                    }
                                    parts = it.toArray(new String[it.size()]);
                                }
                            } catch (IOException e) {
                            }
                        } else {
                            parts = new String[1];
                            parts[0] = obj;
                        }
                        for (String p : parts) {
                            Object ob = cast(p, inner, innerClass);
                            if (ob != null) {
                                output.add(ob);
                            }
                        }

                    }

                    return output;
                }
            }
        }
    } else if (parameter != null) {
        return cast(arguments.get(0), parameter.getSchema(), javaType);
    }
    return null;
}
 
Example 10
Source File: DataFlowFromCsvMain.java    From Decision with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, NumberFormatException, InterruptedException {
    if (args.length < 4) {
        log.info("Usage: \n param 1 - path to file \n param 2 - stream name to send the data \n param 3 - time in ms to wait to send each data \n param 4 - broker list");
    } else {
        Producer<String, String> producer = new Producer<String, String>(createProducerConfig(args[3]));
        Gson gson = new Gson();

        Reader in = new FileReader(args[0]);
        CSVParser parser = CSVFormat.DEFAULT.parse(in);

        List<String> columnNames = new ArrayList<>();
        for (CSVRecord csvRecord : parser.getRecords()) {

            if (columnNames.size() == 0) {
                Iterator<String> iterator = csvRecord.iterator();
                while (iterator.hasNext()) {
                    columnNames.add(iterator.next());
                }
            } else {
                StratioStreamingMessage message = new StratioStreamingMessage();

                message.setOperation(STREAM_OPERATIONS.MANIPULATION.INSERT.toLowerCase());
                message.setStreamName(args[1]);
                message.setTimestamp(System.currentTimeMillis());
                message.setSession_id(String.valueOf(System.currentTimeMillis()));
                message.setRequest_id(String.valueOf(System.currentTimeMillis()));
                message.setRequest("dummy request");

                List<ColumnNameTypeValue> sensorData = new ArrayList<>();
                for (int i = 0; i < columnNames.size(); i++) {

                    // Workaround
                    Object value = null;
                    try {
                        value = Double.valueOf(csvRecord.get(i));
                    } catch (NumberFormatException e) {
                        value = csvRecord.get(i);
                    }
                    sensorData.add(new ColumnNameTypeValue(columnNames.get(i), null, value));
                }

                message.setColumns(sensorData);

                String json = gson.toJson(message);
                log.info("Sending data: {}", json);
                producer.send(new KeyedMessage<String, String>(InternalTopic.TOPIC_DATA.getTopicName(),
                        STREAM_OPERATIONS.MANIPULATION.INSERT, json));

                log.info("Sleeping {} ms...", args[2]);
                Thread.sleep(Long.valueOf(args[2]));
            }
        }
        log.info("Program completed.");
    }
}