codemining.languagetools.TokenizerUtils Java Examples

The following examples show how to use codemining.languagetools.TokenizerUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TokenCounter.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @param args
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
public static void main(final String[] args) throws IOException,
		InstantiationException, IllegalAccessException,
		ClassNotFoundException {
	if (args.length != 2) {
		System.err.println("Usage <codeDir> <TokenizerClass>");
		return;
	}

	long tokenCount = 0;

	final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]);

	for (final File fi : FileUtils.listFiles(new File(args[0]),
			tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) {
		try {
			final char[] code = FileUtils.readFileToString(fi)
					.toCharArray();
			tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove
																		// sentence
																		// start/end
		} catch (final IOException e) {
			LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
		}
	}

	System.out.println("Tokens: " + tokenCount);
}
 
Example #2
Source File: TokenCounter.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param args
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
public static void main(final String[] args) throws IOException,
		InstantiationException, IllegalAccessException,
		ClassNotFoundException {
	if (args.length != 2) {
		System.err.println("Usage <codeDir> <TokenizerClass>");
		return;
	}

	long tokenCount = 0;

	final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]);

	for (final File fi : FileUtils.listFiles(new File(args[0]),
			tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) {
		try {
			final char[] code = FileUtils.readFileToString(fi)
					.toCharArray();
			tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove
																		// sentence
																		// start/end
		} catch (final IOException e) {
			LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
		}
	}

	System.out.println("Tokens: " + tokenCount);
}
 
Example #3
Source File: TokenCounter.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param args
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
public static void main(final String[] args) throws IOException,
		InstantiationException, IllegalAccessException,
		ClassNotFoundException {
	if (args.length != 2) {
		System.err.println("Usage <codeDir> <TokenizerClass>");
		return;
	}

	long tokenCount = 0;

	final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]);

	for (final File fi : FileUtils.listFiles(new File(args[0]),
			tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) {
		try {
			final char[] code = FileUtils.readFileToString(fi)
					.toCharArray();
			tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove
																		// sentence
																		// start/end
		} catch (final IOException e) {
			LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
		}
	}

	System.out.println("Tokens: " + tokenCount);
}
 
Example #4
Source File: DistinctTokenCount.java    From api-mining with GNU General Public License v3.0 4 votes vote down vote up
public DistinctTokenCount(final String tokenizerClass)
		throws InstantiationException, IllegalAccessException,
		ClassNotFoundException {
	tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass);
}
 
Example #5
Source File: TokenizerTUI.java    From api-mining with GNU General Public License v3.0 4 votes vote down vote up
public static void main(final String[] args) throws InvalidInputException,
		IOException, InstantiationException, IllegalAccessException,
		ClassNotFoundException, IllegalArgumentException,
		SecurityException, InvocationTargetException, NoSuchMethodException {
	if (args.length < 2) {
		System.err
				.println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]");
		return;
	}

	final ITokenizer tok;
	final String tokenizerClass = args[1];
	if (args.length == 2) {
		tok = TokenizerUtils.tokenizerForClass(tokenizerClass);
	} else {
		final String tokenizerArguments = args[2];
		tok = TokenizerUtils.tokenizerForClass(tokenizerClass,
				tokenizerArguments);
	}

	final File baseFile = new File(args[0]);
	final Collection<File> allFiles;
	if (baseFile.isDirectory()) {
		allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(),
				DirectoryFileFilter.DIRECTORY);
	} else {
		allFiles = Lists.newArrayList(baseFile);
	}

	for (final File fi : allFiles) {

		final StringBuffer buf = new StringBuffer();
		for (final FullToken token : tok.getTokenListFromCode(fi)) {
			buf.append(token);
			buf.append(System.getProperty("line.separator"));
		}

		System.out.println(buf.toString());
		System.out.println();

	}
}
 
Example #6
Source File: DistinctTokenCount.java    From tassal with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public DistinctTokenCount(final String tokenizerClass)
		throws InstantiationException, IllegalAccessException,
		ClassNotFoundException {
	tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass);
}
 
Example #7
Source File: TokenizerTUI.java    From tassal with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public static void main(final String[] args) throws InvalidInputException,
		IOException, InstantiationException, IllegalAccessException,
		ClassNotFoundException, IllegalArgumentException,
		SecurityException, InvocationTargetException, NoSuchMethodException {
	if (args.length < 2) {
		System.err
				.println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]");
		return;
	}

	final ITokenizer tok;
	final String tokenizerClass = args[1];
	if (args.length == 2) {
		tok = TokenizerUtils.tokenizerForClass(tokenizerClass);
	} else {
		final String tokenizerArguments = args[2];
		tok = TokenizerUtils.tokenizerForClass(tokenizerClass,
				tokenizerArguments);
	}

	final File baseFile = new File(args[0]);
	final Collection<File> allFiles;
	if (baseFile.isDirectory()) {
		allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(),
				DirectoryFileFilter.DIRECTORY);
	} else {
		allFiles = Lists.newArrayList(baseFile);
	}

	for (final File fi : allFiles) {

		final StringBuffer buf = new StringBuffer();
		for (final FullToken token : tok.getTokenListFromCode(fi)) {
			buf.append(token);
			buf.append(System.getProperty("line.separator"));
		}

		System.out.println(buf.toString());
		System.out.println();

	}
}
 
Example #8
Source File: DistinctTokenCount.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public DistinctTokenCount(final String tokenizerClass)
		throws InstantiationException, IllegalAccessException,
		ClassNotFoundException {
	tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass);
}
 
Example #9
Source File: TokenizerTUI.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public static void main(final String[] args) throws InvalidInputException,
		IOException, InstantiationException, IllegalAccessException,
		ClassNotFoundException, IllegalArgumentException,
		SecurityException, InvocationTargetException, NoSuchMethodException {
	if (args.length < 2) {
		System.err
				.println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]");
		return;
	}

	final ITokenizer tok;
	final String tokenizerClass = args[1];
	if (args.length == 2) {
		tok = TokenizerUtils.tokenizerForClass(tokenizerClass);
	} else {
		final String tokenizerArguments = args[2];
		tok = TokenizerUtils.tokenizerForClass(tokenizerClass,
				tokenizerArguments);
	}

	final File baseFile = new File(args[0]);
	final Collection<File> allFiles;
	if (baseFile.isDirectory()) {
		allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(),
				DirectoryFileFilter.DIRECTORY);
	} else {
		allFiles = Lists.newArrayList(baseFile);
	}

	for (final File fi : allFiles) {

		final StringBuffer buf = new StringBuffer();
		for (final FullToken token : tok.getTokenListFromCode(fi)) {
			buf.append(token);
			buf.append(System.getProperty("line.separator"));
		}

		System.out.println(buf.toString());
		System.out.println();

	}
}