Java Code Examples for org.apache.commons.io.LineIterator#nextLine()

The following examples show how to use org.apache.commons.io.LineIterator#nextLine() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Main.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public static void createInputFile(Configuration conf, Path file, Path targetFile,
                                   String startNode)
    throws IOException {
  FileSystem fs = file.getFileSystem(conf);

  OutputStream os = fs.create(targetFile);
  LineIterator iter = org.apache.commons.io.IOUtils
      .lineIterator(fs.open(file), "UTF8");
  while (iter.hasNext()) {
    String line = iter.nextLine();

    String[] parts = StringUtils.split(line);
    int distance = Node.INFINITE;
    if (startNode.equals(parts[0])) {
      distance = 0;
    }
    IOUtils.write(parts[0] + '\t' + String.valueOf(distance) + "\t\t",
        os);
    IOUtils.write(StringUtils.join(parts, '\t', 1, parts.length), os);
    IOUtils.write("\n", os);
  }

  os.close();
}
 
Example 2
Source File: CratesPlus.java    From CratesPlus with GNU General Public License v3.0 6 votes vote down vote up
public String uploadFile(String fileName) {
    File file = new File(getDataFolder(), fileName);
    if (!file.exists())
        return null;
    LineIterator it;
    String lines = "";
    try {
        it = FileUtils.lineIterator(file, "UTF-8");
        try {
            while (it.hasNext()) {
                String line = it.nextLine();
                lines += line + "\n";
            }
        } finally {
            it.close();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return MCDebug.paste(fileName, lines);
}
 
Example 3
Source File: FrequentSequenceMiner.java    From api-mining with GNU General Public License v3.0 6 votes vote down vote up
/** Read in frequent sequences (sorted by support) */
public static SortedMap<Sequence, Integer> readFrequentSequences(final File output) throws IOException {
	final HashMap<Sequence, Integer> sequences = new HashMap<>();

	final LineIterator it = FileUtils.lineIterator(output);
	while (it.hasNext()) {
		final String line = it.nextLine();
		if (!line.trim().isEmpty()) {
			final String[] splitLine = line.split("#SUP:");
			final String[] items = splitLine[0].trim().split("-1");
			final Sequence seq = new Sequence();
			for (final String item : items)
				seq.add(Integer.parseInt(item.trim()));
			final int supp = Integer.parseInt(splitLine[1].trim());
			sequences.put(seq, supp);
		}
	}
	// Sort sequences by support
	final Ordering<Sequence> comparator = Ordering.natural().reverse().onResultOf(Functions.forMap(sequences))
			.compound(Ordering.usingToString());
	return ImmutableSortedMap.copyOf(sequences, comparator);
}
 
Example 4
Source File: PAMCore.java    From api-mining with GNU General Public License v3.0 6 votes vote down vote up
public static TransactionList readTransactions(final File inputFile) throws IOException {

		final List<Transaction> transactions = new ArrayList<>();

		// for each line (transaction) until the end of file
		final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
		while (it.hasNext()) {

			final String line = it.nextLine();
			// if the line is a comment, is empty or is a
			// kind of metadata
			if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
				continue;
			}

			// split the transaction into items
			final String[] lineSplited = line.split(" ");
			// convert to Transaction class and add it to the structure
			transactions.add(getTransaction(lineSplited));

		}
		// close the input file
		LineIterator.closeQuietly(it);

		return new TransactionList(transactions);
	}
 
Example 5
Source File: Corpus.java    From SONDY with GNU General Public License v3.0 6 votes vote down vote up
public HashSet<String> getAuthors(Event event){
    HashSet<String> authors = new HashSet<>();
    String[] interval = event.getTemporalDescription().split(",");
    int timeSliceA = convertDayToTimeSlice(Double.parseDouble(interval[0]));
    int timeSliceB = convertDayToTimeSlice(Double.parseDouble(interval[1]));
    String term = event.getTextualDescription().split(" ")[0];
    NumberFormat formatter = new DecimalFormat("00000000");
    for(int i = timeSliceA; i <= timeSliceB; i++){
        try {
            File textFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".text");
            File authorFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".author");
            LineIterator textIter = FileUtils.lineIterator(textFile);
            LineIterator authorIter = FileUtils.lineIterator(authorFile);
            while(textIter.hasNext()){
                String text = textIter.nextLine();
                String author = authorIter.nextLine();
                if(text.contains(term)){
                    authors.add(author);
                }
            }
        } catch (IOException ex) {
            Logger.getLogger(Corpus.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return authors;
}
 
Example 6
Source File: ChineseCharacterConverter.java    From modernmt with Apache License 2.0 6 votes vote down vote up
private static Map<Integer, Integer> loadDictionary(String filename) {
    HashMap<Integer, Integer> result = new HashMap<>();

    InputStream stream = null;
    LineIterator iterator = null;

    try {
        stream = ChineseCharacterConverter.class.getResourceAsStream(filename);
        iterator = IOUtils.lineIterator(stream, "UTF-8");
        while (iterator.hasNext()) {
            String line = iterator.nextLine();
            String[] keyValues = line.split("\t", 2);
            Integer key = keyValues[0].codePointAt(0);
            Integer value = keyValues[1].codePointAt(0);
            result.put(key, value);
        }

        return result;
    } catch (IOException e) {
        throw new Error(e);
    } finally {
        IOUtils.closeQuietly(stream);
        if (iterator != null)
            iterator.close();
    }
}
 
Example 7
Source File: CountLinesTextFile.java    From levelup-java-examples with Apache License 2.0 6 votes vote down vote up
@Test
public void count_lines_text_apache() throws IOException {

	LineIterator lineIterator = FileUtils.lineIterator(
			Paths.get(fileLocation).toFile(), Charset.defaultCharset()
					.toString());

	long linesInTextFile = 0;
	try {
		while (lineIterator.hasNext()) {
			linesInTextFile++;
			lineIterator.nextLine();
		}
	} finally {
		LineIterator.closeQuietly(lineIterator);
	}

	assertEquals(10, linesInTextFile);
}
 
Example 8
Source File: SensitiveWord.java    From maven-framework-project with MIT License 6 votes vote down vote up
private static void _CheckReload(){
    if(wordfilter.lastModified() > lastModified){
        synchronized(SensitiveWord.class){
            try{
                lastModified = wordfilter.lastModified();
                LineIterator lines = FileUtils.lineIterator(wordfilter, "utf-8");
                while(lines.hasNext()){
                    String line = lines.nextLine();
                    if(StringUtils.isNotBlank(line))
                        words.add(StringUtils.trim(line).toLowerCase());
                }
            }catch(IOException e){
                e.printStackTrace();
            }
        }
    }
}
 
Example 9
Source File: StatisticalSequenceMining.java    From sequence-mining with GNU General Public License v3.0 6 votes vote down vote up
/** Read in GoKrimp sequences (sorted by compression benefit) */
public static LinkedHashMap<Sequence, Double> readGoKrimpSequences(final File output) throws IOException {
	final LinkedHashMap<Sequence, Double> sequences = new LinkedHashMap<>();

	final LineIterator it = FileUtils.lineIterator(output);
	while (it.hasNext()) {
		final String line = it.nextLine();
		if (!line.trim().isEmpty() && line.charAt(0) == '[') {
			final String[] splitLine = line.split(" ");
			final double worth = Double.parseDouble(splitLine[splitLine.length - 1]);
			final Sequence seq = new Sequence();
			for (int i = 1; i < splitLine.length - 2; i++)
				seq.add(Integer.parseInt(splitLine[i]));
			sequences.put(seq, worth);
		}
	}

	return sequences;
}
 
Example 10
Source File: StatisticalSequenceMining.java    From sequence-mining with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Read in GOKRIMP sequences (sorted by compression benefit)
 *
 * @deprecated gives slightly different results to reference implementation
 */
@Deprecated
public static LinkedHashMap<Sequence, Double> readGoKrimpSequencesSPMF(final File output) throws IOException {
	final LinkedHashMap<Sequence, Double> sequences = new LinkedHashMap<>();

	final LineIterator it = FileUtils.lineIterator(output);
	while (it.hasNext()) {
		final String line = it.nextLine();
		if (!line.trim().isEmpty()) {
			final String[] splitLine = line.split("#SUP:");
			final String[] items = splitLine[0].trim().split(" ");
			final Sequence seq = new Sequence();
			for (final String item : items)
				seq.add(Integer.parseInt(item.trim()));
			final double compressionBenefit = Double.parseDouble(splitLine[1].trim());
			sequences.put(seq, compressionBenefit);
		}
	}

	return sequences;
}
 
Example 11
Source File: NumberOfLineFinder.java    From tutorials with MIT License 5 votes vote down vote up
public static int getTotalNumberOfLinesUsingApacheCommonsIO(String fileName) {
    int lines = 0;
    try {
        LineIterator lineIterator = FileUtils.lineIterator(new File(fileName));
        while (lineIterator.hasNext()) {
            lineIterator.nextLine();
            lines++;
        }
    } catch (IOException ioe) {
        ioe.printStackTrace();
    }
    return lines;
}
 
Example 12
Source File: ClientCommandRunner.java    From jenkins-client-plugin with Apache License 2.0 5 votes vote down vote up
@Override
public Boolean call() throws IOException, InterruptedException {
    try (Reader reader = new InputStreamReader(in)) {
        LineIterator it = IOUtils.lineIterator(reader);
        while (it.hasNext()) {
            String line = it.nextLine();
            if (outputObserver.onReadLine(line)) {
                return true; // interrupted by OutputObserver
            }
        }
    }
    return false;
}
 
Example 13
Source File: SensitiveWordMonitor.java    From everyone-java-blog with Apache License 2.0 5 votes vote down vote up
private static Set<String> loadBadWord(File file) {
    Set<String> badWordSet = new HashSet<>();
    try {
        LineIterator it = FileUtils.lineIterator(file);
        while(it.hasNext()) {
            String badWord = it.nextLine();
            badWordSet.add(badWord);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return badWordSet;
}
 
Example 14
Source File: PAM.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
private static void generateTransactionDatabase(final String arffFile, final BiMap<String, Integer> dictionary,
		final File transactionDB) throws IOException {

	int mID = 0;
	boolean found = false;
	final PrintWriter out = new PrintWriter(transactionDB);
	final LineIterator it = FileUtils.lineIterator(new File(arffFile));
	while (it.hasNext()) {
		final String line = it.nextLine();

		if (found) {
			for (final String raw_call : line.split(",")[1].replace("\'", "").split(" ")) {
				final String call = raw_call.trim();
				if (call.isEmpty()) // skip empty strings
					continue;
				if (dictionary.containsKey(call)) {
					final int ID = dictionary.get(call);
					out.print(ID + " -1 ");
				} else {
					out.print(mID + " -1 ");
					dictionary.put(call, mID);
					mID++;
				}
			}
			out.println("-2");
		}

		if (line.contains("@data"))
			found = true;

	}
	it.close();
	out.close();
}
 
Example 15
Source File: FileGrepper.java    From orion.server with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Searches the contents of a file
 * @param file The file to search
 * @return returns whether the search was successful
 * @throws IOException thrown if there is an error reading the file
 */
private boolean searchFile(File file) {
	LineIterator lineIterator = null;
	try {
		lineIterator = FileUtils.lineIterator(file);
	} catch (IOException e) {
		logger.error("FileGrepper.searchFile: " + e.getLocalizedMessage());
		return false;
	}
	try {
		while (lineIterator.hasNext()) {
			String line = lineIterator.nextLine();
			if (line.contains("\0")) {
				// file contains binary content
				return false;
			}
			matcher.reset(line);
			if (matcher.find()) {
				return true;
			}
		}
	} finally {
		if (lineIterator != null)
			lineIterator.close();
	}
	return false;
}
 
Example 16
Source File: PAMCore.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
/**
 * This method scans the input database to calculate the support of single
 * items.
 *
 * @param inputFile
 *            the input file
 * @return a multiset for storing the support of each singleton
 */
public static Multiset<Sequence> scanDatabaseToDetermineFrequencyOfSingleItems(final File inputFile)
		throws IOException {

	final Multiset<Sequence> singletons = HashMultiset.create();

	// for each line (transaction) until the end of file
	final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
	while (it.hasNext()) {

		final String line = it.nextLine();
		// if the line is a comment, is empty or is a
		// kind of metadata
		if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
			continue;
		}

		// split the line into items
		final String[] lineSplit = line.split(" ");
		// for each item
		final HashSet<Sequence> seenItems = new HashSet<>();
		for (final String itemString : lineSplit) {
			final int item = Integer.parseInt(itemString);
			if (item >= 0) { // ignore end of itemset/sequence tags
				final Sequence seq = new Sequence(item);
				PAMCore.recursiveSetOccurrence(seq, seenItems); // set
																// occurrence
				seenItems.add(seq); // add item to seen
			}
		}
		singletons.addAll(seenItems); // increase the support of the items
	}

	// close the input file
	LineIterator.closeQuietly(it);

	return singletons;
}
 
Example 17
Source File: Corpus.java    From SONDY with GNU General Public License v3.0 4 votes vote down vote up
public ObservableList<Message> getFilteredMessages(Event event, String[] words, int operator){
    ObservableList<Message> messages = FXCollections.observableArrayList();
    String[] interval = event.getTemporalDescription().split(",");
    int timeSliceA = convertDayToTimeSlice(Double.parseDouble(interval[0]));
    int timeSliceB = convertDayToTimeSlice(Double.parseDouble(interval[1]));
    String term = event.getTextualDescription().split(" ")[0];
    NumberFormat formatter = new DecimalFormat("00000000");
    for(int i = timeSliceA; i <= timeSliceB; i++){
        try {
            File textFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".text");
            File timeFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".time");
            File authorFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".author");
            LineIterator textIter = FileUtils.lineIterator(textFile);
            LineIterator timeIter = FileUtils.lineIterator(timeFile);
            LineIterator authorIter = FileUtils.lineIterator(authorFile);
            while(textIter.hasNext()){
                String text = textIter.nextLine();
                short[] test = new short[words.length];
                for(int j = 0; j < words.length; j++){
                    if(StringUtils.containsIgnoreCase(text,words[j])){
                        test[j] = 1;
                    }else{
                        test[j] = 0;
                    }
                }
                if(StringUtils.containsIgnoreCase(text,term)){
                    int testSum = ArrayUtils.sum(test, 0, test.length-1);
                    String author = authorIter.nextLine();
                    String time = timeIter.nextLine();
                    if(operator==0 && testSum == test.length){
                        messages.add(new Message(author,time,text));
                    }
                    if(operator==1 && testSum > 0){
                        messages.add(new Message(author,time,text));
                    }
                }
            }
        } catch (IOException ex) {
            Logger.getLogger(Corpus.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return messages;
}
 
Example 18
Source File: FastOwlSim.java    From owltools with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * @param fileName
 * @throws IOException
 */
@Override
public void loadLCSCache(String fileName) throws IOException {
	try {
		clearLCSCache();
	} catch (UnknownOWLClassException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
		throw new IOException("Cannot clear cache");
	}
	LOG.info("Loading LCS cache from "+fileName);

	FileInputStream s = new FileInputStream(fileName);
	//List<String> lines = IOUtils.readLines(s);
	LineIterator itr = IOUtils.lineIterator(s, UTF_8);
	while (itr.hasNext()) {
		String line = itr.nextLine();
		String[] vals = line.split("\t");
		OWLClass c1 = getOWLClassFromShortId(vals[0]);
		OWLClass c2 = getOWLClassFromShortId(vals[1]);
		OWLClass a = getOWLClassFromShortId(vals[3]);
		Integer cix = classIndex.get(c1);
		Integer dix = classIndex.get(c2);
		Integer aix = classIndex.get(a);
		if (cix == null) {
			LOG.error("Unknown class C: "+c1);
		}
		if (dix == null) {
			LOG.error("Unknown class D: "+c2);
		}
		if (aix == null) {
			LOG.error("Unknown ancestor class: "+a);
		}

		// Note that we only populate half the cache
		// Ensure cix < dix
		int temp;
		if (cix > dix) {
			// swap
			temp = cix;
			cix = dix;
			dix = temp;
		}

		ciPairIsCached[cix][dix] = true;
		//ciPairScaledScore[cix][dix] = (short)(Double.valueOf(vals[2]) * scaleFactor);
		// TODO - set all IC caches
		ciPairLCS[cix][dix] = aix;
	}
	s.close();
	LOG.info("Finished loading LCS cache from "+fileName);
	isLCSCacheFullyPopulated = true;
}
 
Example 19
Source File: WordVectorSerializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * Loads an in memory cache from the given input stream (sets syn0 and the vocab).
 *
 * @param inputStream  input stream
 * @return a {@link Pair} holding the lookup table and the vocab cache.
 */
public static Pair<InMemoryLookupTable, VocabCache> loadTxt(@NonNull InputStream inputStream) {
    AbstractCache<VocabWord> cache = new AbstractCache<>();
    LineIterator lines = null;

    try (InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
         BufferedReader reader = new BufferedReader(inputStreamReader)) {
        lines = IOUtils.lineIterator(reader);

        String line = null;
        boolean hasHeader = false;

        /* Check if first line is a header */
        if (lines.hasNext()) {
            line = lines.nextLine();
            hasHeader = isHeader(line, cache);
        }

        if (hasHeader) {
            log.debug("First line is a header");
            line = lines.nextLine();
        }

        List<INDArray> arrays = new ArrayList<>();
        long[] vShape = new long[]{ 1, -1 };

        do {
            String[] tokens = line.split(" ");
            String word = ReadHelper.decodeB64(tokens[0]);
            VocabWord vocabWord = new VocabWord(1.0, word);
            vocabWord.setIndex(cache.numWords());

            cache.addToken(vocabWord);
            cache.addWordToIndex(vocabWord.getIndex(), word);
            cache.putVocabWord(word);

            float[] vector = new float[tokens.length - 1];
            for (int i = 1; i < tokens.length; i++) {
                vector[i - 1] = Float.parseFloat(tokens[i]);
            }

            vShape[1] = vector.length;
            INDArray row = Nd4j.create(vector, vShape);

            arrays.add(row);

            line = lines.hasNext() ? lines.next() : null;
        } while (line != null);

        INDArray syn = Nd4j.vstack(arrays);

        InMemoryLookupTable<VocabWord> lookupTable = new InMemoryLookupTable
                .Builder<VocabWord>()
                .vectorLength(arrays.get(0).columns())
                .useAdaGrad(false)
                .cache(cache)
                .useHierarchicSoftmax(false)
                .build();

        lookupTable.setSyn0(syn);

        return new Pair<>((InMemoryLookupTable) lookupTable, (VocabCache) cache);
    } catch (IOException readeTextStreamException) {
        throw new RuntimeException(readeTextStreamException);
    } finally {
        if (lines != null) {
            lines.close();
        }
    }
}
 
Example 20
Source File: StatisticalSequenceMining.java    From sequence-mining with GNU General Public License v3.0 4 votes vote down vote up
/** Convert dataset from SPMF format to GoKrimp format */
private static void convertDatasetGoKrimpFormat(final File inputDB, final File outputDB) throws IOException {

	// Output DB
	final BufferedWriter db = new BufferedWriter(new FileWriter(outputDB));

	// for each line (transaction) until the end of file
	boolean newSeq = false;
	final LineIterator it = FileUtils.lineIterator(inputDB, "UTF-8");
	while (it.hasNext()) {

		final String line = it.nextLine();
		// if the line is a comment, is empty or is a
		// kind of metadata
		if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
			continue;
		}

		// sequence separator
		if (newSeq)
			db.write("\n");

		// split the transaction into items
		final String[] lineSplited = line.split(" ");

		for (int i = 0; i < lineSplited.length; i++) {
			if (lineSplited[i].equals("-1")) { // end of item

			} else if (lineSplited[i].equals("-2")) { // end of sequence
				newSeq = true;
			} else { // extract the value for an item
				db.write(lineSplited[i] + " ");
			}
		}

	}
	db.newLine();
	db.close();

	// close the input file
	LineIterator.closeQuietly(it);

}