org.apache.commons.io.LineIterator#nextLine

Source File: Main.java From hiped2 with Apache License 2.0

6 votes

public static void createInputFile(Configuration conf, Path file, Path targetFile,
                                   String startNode)
    throws IOException {
  FileSystem fs = file.getFileSystem(conf);

  OutputStream os = fs.create(targetFile);
  LineIterator iter = org.apache.commons.io.IOUtils
      .lineIterator(fs.open(file), "UTF8");
  while (iter.hasNext()) {
    String line = iter.nextLine();

    String[] parts = StringUtils.split(line);
    int distance = Node.INFINITE;
    if (startNode.equals(parts[0])) {
      distance = 0;
    }
    IOUtils.write(parts[0] + '\t' + String.valueOf(distance) + "\t\t",
        os);
    IOUtils.write(StringUtils.join(parts, '\t', 1, parts.length), os);
    IOUtils.write("\n", os);
  }

  os.close();
}

Source File: CratesPlus.java From CratesPlus with GNU General Public License v3.0

6 votes

public String uploadFile(String fileName) {
    File file = new File(getDataFolder(), fileName);
    if (!file.exists())
        return null;
    LineIterator it;
    String lines = "";
    try {
        it = FileUtils.lineIterator(file, "UTF-8");
        try {
            while (it.hasNext()) {
                String line = it.nextLine();
                lines += line + "\n";
            }
        } finally {
            it.close();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return MCDebug.paste(fileName, lines);
}

Source File: FrequentSequenceMiner.java From api-mining with GNU General Public License v3.0

6 votes

/** Read in frequent sequences (sorted by support) */
public static SortedMap<Sequence, Integer> readFrequentSequences(final File output) throws IOException {
	final HashMap<Sequence, Integer> sequences = new HashMap<>();

	final LineIterator it = FileUtils.lineIterator(output);
	while (it.hasNext()) {
		final String line = it.nextLine();
		if (!line.trim().isEmpty()) {
			final String[] splitLine = line.split("#SUP:");
			final String[] items = splitLine[0].trim().split("-1");
			final Sequence seq = new Sequence();
			for (final String item : items)
				seq.add(Integer.parseInt(item.trim()));
			final int supp = Integer.parseInt(splitLine[1].trim());
			sequences.put(seq, supp);
		}
	}
	// Sort sequences by support
	final Ordering<Sequence> comparator = Ordering.natural().reverse().onResultOf(Functions.forMap(sequences))
			.compound(Ordering.usingToString());
	return ImmutableSortedMap.copyOf(sequences, comparator);
}

Source File: PAMCore.java From api-mining with GNU General Public License v3.0

6 votes

public static TransactionList readTransactions(final File inputFile) throws IOException {

		final List<Transaction> transactions = new ArrayList<>();

		// for each line (transaction) until the end of file
		final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
		while (it.hasNext()) {

			final String line = it.nextLine();
			// if the line is a comment, is empty or is a
			// kind of metadata
			if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
				continue;
			}

			// split the transaction into items
			final String[] lineSplited = line.split(" ");
			// convert to Transaction class and add it to the structure
			transactions.add(getTransaction(lineSplited));

		}
		// close the input file
		LineIterator.closeQuietly(it);

		return new TransactionList(transactions);
	}

Source File: Corpus.java From SONDY with GNU General Public License v3.0

6 votes

public HashSet<String> getAuthors(Event event){
    HashSet<String> authors = new HashSet<>();
    String[] interval = event.getTemporalDescription().split(",");
    int timeSliceA = convertDayToTimeSlice(Double.parseDouble(interval[0]));
    int timeSliceB = convertDayToTimeSlice(Double.parseDouble(interval[1]));
    String term = event.getTextualDescription().split(" ")[0];
    NumberFormat formatter = new DecimalFormat("00000000");
    for(int i = timeSliceA; i <= timeSliceB; i++){
        try {
            File textFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".text");
            File authorFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".author");
            LineIterator textIter = FileUtils.lineIterator(textFile);
            LineIterator authorIter = FileUtils.lineIterator(authorFile);
            while(textIter.hasNext()){
                String text = textIter.nextLine();
                String author = authorIter.nextLine();
                if(text.contains(term)){
                    authors.add(author);
                }
            }
        } catch (IOException ex) {
            Logger.getLogger(Corpus.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return authors;
}

Source File: ChineseCharacterConverter.java From modernmt with Apache License 2.0

6 votes

private static Map<Integer, Integer> loadDictionary(String filename) {
    HashMap<Integer, Integer> result = new HashMap<>();

    InputStream stream = null;
    LineIterator iterator = null;

    try {
        stream = ChineseCharacterConverter.class.getResourceAsStream(filename);
        iterator = IOUtils.lineIterator(stream, "UTF-8");
        while (iterator.hasNext()) {
            String line = iterator.nextLine();
            String[] keyValues = line.split("\t", 2);
            Integer key = keyValues[0].codePointAt(0);
            Integer value = keyValues[1].codePointAt(0);
            result.put(key, value);
        }

        return result;
    } catch (IOException e) {
        throw new Error(e);
    } finally {
        IOUtils.closeQuietly(stream);
        if (iterator != null)
            iterator.close();
    }
}

Source File: CountLinesTextFile.java From levelup-java-examples with Apache License 2.0

6 votes

@Test
public void count_lines_text_apache() throws IOException {

	LineIterator lineIterator = FileUtils.lineIterator(
			Paths.get(fileLocation).toFile(), Charset.defaultCharset()
					.toString());

	long linesInTextFile = 0;
	try {
		while (lineIterator.hasNext()) {
			linesInTextFile++;
			lineIterator.nextLine();
		}
	} finally {
		LineIterator.closeQuietly(lineIterator);
	}

	assertEquals(10, linesInTextFile);
}

Source File: SensitiveWord.java From maven-framework-project with MIT License

6 votes

private static void _CheckReload(){
    if(wordfilter.lastModified() > lastModified){
        synchronized(SensitiveWord.class){
            try{
                lastModified = wordfilter.lastModified();
                LineIterator lines = FileUtils.lineIterator(wordfilter, "utf-8");
                while(lines.hasNext()){
                    String line = lines.nextLine();
                    if(StringUtils.isNotBlank(line))
                        words.add(StringUtils.trim(line).toLowerCase());
                }
            }catch(IOException e){
                e.printStackTrace();
            }
        }
    }
}

Source File: StatisticalSequenceMining.java From sequence-mining with GNU General Public License v3.0

6 votes

/** Read in GoKrimp sequences (sorted by compression benefit) */
public static LinkedHashMap<Sequence, Double> readGoKrimpSequences(final File output) throws IOException {
	final LinkedHashMap<Sequence, Double> sequences = new LinkedHashMap<>();

	final LineIterator it = FileUtils.lineIterator(output);
	while (it.hasNext()) {
		final String line = it.nextLine();
		if (!line.trim().isEmpty() && line.charAt(0) == '[') {
			final String[] splitLine = line.split(" ");
			final double worth = Double.parseDouble(splitLine[splitLine.length - 1]);
			final Sequence seq = new Sequence();
			for (int i = 1; i < splitLine.length - 2; i++)
				seq.add(Integer.parseInt(splitLine[i]));
			sequences.put(seq, worth);
		}
	}

	return sequences;
}

Source File: StatisticalSequenceMining.java From sequence-mining with GNU General Public License v3.0

6 votes

/**
 * Read in GOKRIMP sequences (sorted by compression benefit)
 *
 * @deprecated gives slightly different results to reference implementation
 */
@Deprecated
public static LinkedHashMap<Sequence, Double> readGoKrimpSequencesSPMF(final File output) throws IOException {
	final LinkedHashMap<Sequence, Double> sequences = new LinkedHashMap<>();

	final LineIterator it = FileUtils.lineIterator(output);
	while (it.hasNext()) {
		final String line = it.nextLine();
		if (!line.trim().isEmpty()) {
			final String[] splitLine = line.split("#SUP:");
			final String[] items = splitLine[0].trim().split(" ");
			final Sequence seq = new Sequence();
			for (final String item : items)
				seq.add(Integer.parseInt(item.trim()));
			final double compressionBenefit = Double.parseDouble(splitLine[1].trim());
			sequences.put(seq, compressionBenefit);
		}
	}

	return sequences;
}

Source File: NumberOfLineFinder.java From tutorials with MIT License

5 votes

public static int getTotalNumberOfLinesUsingApacheCommonsIO(String fileName) {
    int lines = 0;
    try {
        LineIterator lineIterator = FileUtils.lineIterator(new File(fileName));
        while (lineIterator.hasNext()) {
            lineIterator.nextLine();
            lines++;
        }
    } catch (IOException ioe) {
        ioe.printStackTrace();
    }
    return lines;
}

Source File: ClientCommandRunner.java From jenkins-client-plugin with Apache License 2.0

5 votes

@Override
public Boolean call() throws IOException, InterruptedException {
    try (Reader reader = new InputStreamReader(in)) {
        LineIterator it = IOUtils.lineIterator(reader);
        while (it.hasNext()) {
            String line = it.nextLine();
            if (outputObserver.onReadLine(line)) {
                return true; // interrupted by OutputObserver
            }
        }
    }
    return false;
}

Source File: SensitiveWordMonitor.java From everyone-java-blog with Apache License 2.0

5 votes

private static Set<String> loadBadWord(File file) {
    Set<String> badWordSet = new HashSet<>();
    try {
        LineIterator it = FileUtils.lineIterator(file);
        while(it.hasNext()) {
            String badWord = it.nextLine();
            badWordSet.add(badWord);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return badWordSet;
}

Source File: PAM.java From api-mining with GNU General Public License v3.0

5 votes

private static void generateTransactionDatabase(final String arffFile, final BiMap<String, Integer> dictionary,
		final File transactionDB) throws IOException {

	int mID = 0;
	boolean found = false;
	final PrintWriter out = new PrintWriter(transactionDB);
	final LineIterator it = FileUtils.lineIterator(new File(arffFile));
	while (it.hasNext()) {
		final String line = it.nextLine();

		if (found) {
			for (final String raw_call : line.split(",")[1].replace("\'", "").split(" ")) {
				final String call = raw_call.trim();
				if (call.isEmpty()) // skip empty strings
					continue;
				if (dictionary.containsKey(call)) {
					final int ID = dictionary.get(call);
					out.print(ID + " -1 ");
				} else {
					out.print(mID + " -1 ");
					dictionary.put(call, mID);
					mID++;
				}
			}
			out.println("-2");
		}

		if (line.contains("@data"))
			found = true;

	}
	it.close();
	out.close();
}

Source File: FileGrepper.java From orion.server with Eclipse Public License 1.0

5 votes

/**
 * Searches the contents of a file
 * @param file The file to search
 * @return returns whether the search was successful
 * @throws IOException thrown if there is an error reading the file
 */
private boolean searchFile(File file) {
	LineIterator lineIterator = null;
	try {
		lineIterator = FileUtils.lineIterator(file);
	} catch (IOException e) {
		logger.error("FileGrepper.searchFile: " + e.getLocalizedMessage());
		return false;
	}
	try {
		while (lineIterator.hasNext()) {
			String line = lineIterator.nextLine();
			if (line.contains("\0")) {
				// file contains binary content
				return false;
			}
			matcher.reset(line);
			if (matcher.find()) {
				return true;
			}
		}
	} finally {
		if (lineIterator != null)
			lineIterator.close();
	}
	return false;
}

Source File: PAMCore.java From api-mining with GNU General Public License v3.0

5 votes

/**
 * This method scans the input database to calculate the support of single
 * items.
 *
 * @param inputFile
 *            the input file
 * @return a multiset for storing the support of each singleton
 */
public static Multiset<Sequence> scanDatabaseToDetermineFrequencyOfSingleItems(final File inputFile)
		throws IOException {

	final Multiset<Sequence> singletons = HashMultiset.create();

	// for each line (transaction) until the end of file
	final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
	while (it.hasNext()) {

		final String line = it.nextLine();
		// if the line is a comment, is empty or is a
		// kind of metadata
		if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
			continue;
		}

		// split the line into items
		final String[] lineSplit = line.split(" ");
		// for each item
		final HashSet<Sequence> seenItems = new HashSet<>();
		for (final String itemString : lineSplit) {
			final int item = Integer.parseInt(itemString);
			if (item >= 0) { // ignore end of itemset/sequence tags
				final Sequence seq = new Sequence(item);
				PAMCore.recursiveSetOccurrence(seq, seenItems); // set
																// occurrence
				seenItems.add(seq); // add item to seen
			}
		}
		singletons.addAll(seenItems); // increase the support of the items
	}

	// close the input file
	LineIterator.closeQuietly(it);

	return singletons;
}

Source File: Corpus.java From SONDY with GNU General Public License v3.0

4 votes

public ObservableList<Message> getFilteredMessages(Event event, String[] words, int operator){
    ObservableList<Message> messages = FXCollections.observableArrayList();
    String[] interval = event.getTemporalDescription().split(",");
    int timeSliceA = convertDayToTimeSlice(Double.parseDouble(interval[0]));
    int timeSliceB = convertDayToTimeSlice(Double.parseDouble(interval[1]));
    String term = event.getTextualDescription().split(" ")[0];
    NumberFormat formatter = new DecimalFormat("00000000");
    for(int i = timeSliceA; i <= timeSliceB; i++){
        try {
            File textFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".text");
            File timeFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".time");
            File authorFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".author");
            LineIterator textIter = FileUtils.lineIterator(textFile);
            LineIterator timeIter = FileUtils.lineIterator(timeFile);
            LineIterator authorIter = FileUtils.lineIterator(authorFile);
            while(textIter.hasNext()){
                String text = textIter.nextLine();
                short[] test = new short[words.length];
                for(int j = 0; j < words.length; j++){
                    if(StringUtils.containsIgnoreCase(text,words[j])){
                        test[j] = 1;
                    }else{
                        test[j] = 0;
                    }
                }
                if(StringUtils.containsIgnoreCase(text,term)){
                    int testSum = ArrayUtils.sum(test, 0, test.length-1);
                    String author = authorIter.nextLine();
                    String time = timeIter.nextLine();
                    if(operator==0 && testSum == test.length){
                        messages.add(new Message(author,time,text));
                    }
                    if(operator==1 && testSum > 0){
                        messages.add(new Message(author,time,text));
                    }
                }
            }
        } catch (IOException ex) {
            Logger.getLogger(Corpus.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return messages;
}

Source File: FastOwlSim.java From owltools with BSD 3-Clause "New" or "Revised" License

4 votes

/**
 * @param fileName
 * @throws IOException
 */
@Override
public void loadLCSCache(String fileName) throws IOException {
	try {
		clearLCSCache();
	} catch (UnknownOWLClassException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
		throw new IOException("Cannot clear cache");
	}
	LOG.info("Loading LCS cache from "+fileName);

	FileInputStream s = new FileInputStream(fileName);
	//List<String> lines = IOUtils.readLines(s);
	LineIterator itr = IOUtils.lineIterator(s, UTF_8);
	while (itr.hasNext()) {
		String line = itr.nextLine();
		String[] vals = line.split("\t");
		OWLClass c1 = getOWLClassFromShortId(vals[0]);
		OWLClass c2 = getOWLClassFromShortId(vals[1]);
		OWLClass a = getOWLClassFromShortId(vals[3]);
		Integer cix = classIndex.get(c1);
		Integer dix = classIndex.get(c2);
		Integer aix = classIndex.get(a);
		if (cix == null) {
			LOG.error("Unknown class C: "+c1);
		}
		if (dix == null) {
			LOG.error("Unknown class D: "+c2);
		}
		if (aix == null) {
			LOG.error("Unknown ancestor class: "+a);
		}

		// Note that we only populate half the cache
		// Ensure cix < dix
		int temp;
		if (cix > dix) {
			// swap
			temp = cix;
			cix = dix;
			dix = temp;
		}

		ciPairIsCached[cix][dix] = true;
		//ciPairScaledScore[cix][dix] = (short)(Double.valueOf(vals[2]) * scaleFactor);
		// TODO - set all IC caches
		ciPairLCS[cix][dix] = aix;
	}
	s.close();
	LOG.info("Finished loading LCS cache from "+fileName);
	isLCSCacheFullyPopulated = true;
}

Source File: WordVectorSerializer.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * Loads an in memory cache from the given input stream (sets syn0 and the vocab).
 *
 * @param inputStream  input stream
 * @return a {@link Pair} holding the lookup table and the vocab cache.
 */
public static Pair<InMemoryLookupTable, VocabCache> loadTxt(@NonNull InputStream inputStream) {
    AbstractCache<VocabWord> cache = new AbstractCache<>();
    LineIterator lines = null;

    try (InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
         BufferedReader reader = new BufferedReader(inputStreamReader)) {
        lines = IOUtils.lineIterator(reader);

        String line = null;
        boolean hasHeader = false;

        /* Check if first line is a header */
        if (lines.hasNext()) {
            line = lines.nextLine();
            hasHeader = isHeader(line, cache);
        }

        if (hasHeader) {
            log.debug("First line is a header");
            line = lines.nextLine();
        }

        List<INDArray> arrays = new ArrayList<>();
        long[] vShape = new long[]{ 1, -1 };

        do {
            String[] tokens = line.split(" ");
            String word = ReadHelper.decodeB64(tokens[0]);
            VocabWord vocabWord = new VocabWord(1.0, word);
            vocabWord.setIndex(cache.numWords());

            cache.addToken(vocabWord);
            cache.addWordToIndex(vocabWord.getIndex(), word);
            cache.putVocabWord(word);

            float[] vector = new float[tokens.length - 1];
            for (int i = 1; i < tokens.length; i++) {
                vector[i - 1] = Float.parseFloat(tokens[i]);
            }

            vShape[1] = vector.length;
            INDArray row = Nd4j.create(vector, vShape);

            arrays.add(row);

            line = lines.hasNext() ? lines.next() : null;
        } while (line != null);

        INDArray syn = Nd4j.vstack(arrays);

        InMemoryLookupTable<VocabWord> lookupTable = new InMemoryLookupTable
                .Builder<VocabWord>()
                .vectorLength(arrays.get(0).columns())
                .useAdaGrad(false)
                .cache(cache)
                .useHierarchicSoftmax(false)
                .build();

        lookupTable.setSyn0(syn);

        return new Pair<>((InMemoryLookupTable) lookupTable, (VocabCache) cache);
    } catch (IOException readeTextStreamException) {
        throw new RuntimeException(readeTextStreamException);
    } finally {
        if (lines != null) {
            lines.close();
        }
    }
}

Source File: StatisticalSequenceMining.java From sequence-mining with GNU General Public License v3.0

4 votes

/** Convert dataset from SPMF format to GoKrimp format */
private static void convertDatasetGoKrimpFormat(final File inputDB, final File outputDB) throws IOException {

	// Output DB
	final BufferedWriter db = new BufferedWriter(new FileWriter(outputDB));

	// for each line (transaction) until the end of file
	boolean newSeq = false;
	final LineIterator it = FileUtils.lineIterator(inputDB, "UTF-8");
	while (it.hasNext()) {

		final String line = it.nextLine();
		// if the line is a comment, is empty or is a
		// kind of metadata
		if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
			continue;
		}

		// sequence separator
		if (newSeq)
			db.write("\n");

		// split the transaction into items
		final String[] lineSplited = line.split(" ");

		for (int i = 0; i < lineSplited.length; i++) {
			if (lineSplited[i].equals("-1")) { // end of item

			} else if (lineSplited[i].equals("-2")) { // end of sequence
				newSeq = true;
			} else { // extract the value for an item
				db.write(lineSplited[i] + " ");
			}
		}

	}
	db.newLine();
	db.close();

	// close the input file
	LineIterator.closeQuietly(it);

}

Java Code Examples for org.apache.commons.io.LineIterator#nextLine()