codemining.java.tokenizers.JavaTokenizer Java Examples

The following examples show how to use codemining.java.tokenizers.JavaTokenizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PerturbationEvaluator.java    From naturalize with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) {
	if (args.length < 3) {
		System.err
				.println("Usage <directory> <renamerClass> variable|method");
		return;
	}

	final IScopeExtractor scopeExtractor = ScopesTUI
			.getScopeExtractorByName(args[2]);

	final String renamerClass = args[1];

	final File directory = new File(args[0]);

	final PerturbationEvaluator pe = new PerturbationEvaluator(directory,
			new JavaTokenizer(), scopeExtractor, renamerClass);

	pe.performEvaluation();
	pe.er.printStats();

}
 
Example #2
Source File: JavaBindingsToJson.java    From tassal with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Extract the bindings from the input folder to the output file, using the
 * bindingExtractor.
 *
 * @param inputFolder
 * @param outputFile
 * @param bindingExtractor
 * @throws IOException
 * @throws JsonIOException
 */
public static void extractBindings(final File inputFolder,
		final File outputFile,
		final AbstractJavaNameBindingsExtractor bindingExtractor)
		throws IOException, JsonIOException {
	final Collection<File> allFiles = FileUtils
			.listFiles(inputFolder, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);
	final List<SerializableResolvedSourceCode> resolvedCode = allFiles
			.parallelStream()
			.map(f -> getResolvedCode(f, bindingExtractor))
			.filter(r -> r != null)
			.map(r -> SerializableResolvedSourceCode
					.fromResolvedSourceCode(r))
			.filter(s -> !s.boundVariables.isEmpty())
			.collect(Collectors.toList());

	final FileWriter writer = new FileWriter(outputFile);
	try {
		final Gson gson = new Gson();
		gson.toJson(resolvedCode, writer);
	} finally {
		writer.close();
	}
}
 
Example #3
Source File: JavaTypeHierarchyExtractor.java    From api-mining with GNU General Public License v3.0 6 votes vote down vote up
/**
 * @param args
 */
public static void main(final String[] args) {
	if (args.length != 1) {
		System.err.println("Usage <codeFolder>");
		System.exit(-1);
	}
	final File directory = new File(args[0]);

	final Collection<File> allFiles = FileUtils
			.listFiles(directory, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);

	final JavaTypeHierarchyExtractor jthe = new JavaTypeHierarchyExtractor();
	jthe.addFilesToCorpus(allFiles);

	System.out.println(jthe);
}
 
Example #4
Source File: JavaTypeHierarchyExtractor.java    From tassal with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param args
 */
public static void main(final String[] args) {
	if (args.length != 1) {
		System.err.println("Usage <codeFolder>");
		System.exit(-1);
	}
	final File directory = new File(args[0]);

	final Collection<File> allFiles = FileUtils
			.listFiles(directory, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);

	final JavaTypeHierarchyExtractor jthe = new JavaTypeHierarchyExtractor();
	jthe.addFilesToCorpus(allFiles);

	System.out.println(jthe);
}
 
Example #5
Source File: JavaASTExtractor.java    From tassal with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Hacky way to compare snippets.
 *
 * @param snippet
 * @return
 */
private String normalizeCode(final char[] snippet) {
	final List<String> tokens = (new JavaTokenizer())
			.tokenListFromCode(snippet);

	final StringBuffer bf = new StringBuffer();
	for (final String token : tokens) {
		if (token.equals(ITokenizer.SENTENCE_START)
				|| token.equals(ITokenizer.SENTENCE_END)) {
			continue;
		} else {
			bf.append(token);
		}
		bf.append(" ");
	}
	return bf.toString();

}
 
Example #6
Source File: CommonNameRenamingEvaluator.java    From naturalize with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) {
	if (args.length < 5) {
		System.err.println("Usage <folder> <junkNames> ...");
		return;
	}

	final File directory = new File(args[0]);

	final ITokenizer tokenizer = new JavaTokenizer();

	final Class<? extends AbstractNGramLM> smoothedNgramClass = StupidBackoff.class;

	final CommonNameRenamingEvaluator evaluator = new CommonNameRenamingEvaluator(
			directory, tokenizer, smoothedNgramClass,
			Sets.newTreeSet(Arrays.asList(args).subList(1, args.length)));
	evaluator.evaluate();
	evaluator.printResults();

}
 
Example #7
Source File: ChangingIdentifiersRepositoryWalker.java    From naturalize with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void doFirstScan(final File repositoryDir, final String sha) {
	for (final File f : FileUtils
			.listFiles(repositoryDir, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY)) {
		final String fileInRepo = f.getAbsolutePath().substring(
				(int) (repositoryDir.getAbsolutePath().length() + 1));
		Set<IdentifierInformation> identiferInfos;
		try {
			identiferInfos = infoScanner.scanFile(f, sha);
			identiferInfos
					.forEach(info -> {
						final IdentifierInformationThroughTime iitt = new IdentifierInformationThroughTime();
						iitt.addInformation(info);
						currentStateOfIdentifiers.put(fileInRepo, iitt);
					});
		} catch (final IOException e) {
			LOGGER.severe("Could not find file " + f + "\n"
					+ ExceptionUtils.getFullStackTrace(e));
		}

	}
}
 
Example #8
Source File: JavaTypeHierarchyExtractor.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param args
 */
public static void main(final String[] args) {
	if (args.length != 1) {
		System.err.println("Usage <codeFolder>");
		System.exit(-1);
	}
	final File directory = new File(args[0]);

	final Collection<File> allFiles = FileUtils
			.listFiles(directory, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);

	final JavaTypeHierarchyExtractor jthe = new JavaTypeHierarchyExtractor();
	jthe.addFilesToCorpus(allFiles);

	System.out.println(jthe);
}
 
Example #9
Source File: JavaASTExtractor.java    From api-mining with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Hacky way to compare snippets.
 *
 * @param snippet
 * @return
 */
private String normalizeCode(final char[] snippet) {
	final List<String> tokens = (new JavaTokenizer())
			.tokenListFromCode(snippet);

	final StringBuffer bf = new StringBuffer();
	for (final String token : tokens) {
		if (token.equals(ITokenizer.SENTENCE_START)
				|| token.equals(ITokenizer.SENTENCE_END)) {
			continue;
		} else {
			bf.append(token);
		}
		bf.append(" ");
	}
	return bf.toString();

}
 
Example #10
Source File: JavaBindingsToJson.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Extract the bindings from the input folder to the output file, using the
 * bindingExtractor.
 *
 * @param inputFolder
 * @param outputFile
 * @param bindingExtractor
 * @throws IOException
 * @throws JsonIOException
 */
public static void extractBindings(final File inputFolder,
		final File outputFile,
		final AbstractJavaNameBindingsExtractor bindingExtractor)
		throws IOException, JsonIOException {
	final Collection<File> allFiles = FileUtils
			.listFiles(inputFolder, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);
	final List<SerializableResolvedSourceCode> resolvedCode = allFiles
			.parallelStream()
			.map(f -> getResolvedCode(f, bindingExtractor))
			.filter(r -> r != null)
			.map(r -> SerializableResolvedSourceCode
					.fromResolvedSourceCode(r))
			.filter(s -> !s.boundVariables.isEmpty())
			.collect(Collectors.toList());

	final FileWriter writer = new FileWriter(outputFile);
	try {
		final Gson gson = new Gson();
		gson.toJson(resolvedCode, writer);
	} finally {
		writer.close();
	}
}
 
Example #11
Source File: SegmentRenamingSuggestion.java    From naturalize with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public static SortedSet<Suggestion> getVariableSuggestions(
		final File currentFile, final File directory, final boolean useUNK)
		throws IOException {
	final ITokenizer tokenizer = new JavaTokenizer();

	final AbstractIdentifierRenamings renamer = new BaseIdentifierRenamings(
			tokenizer);

	final Collection<java.io.File> trainingFiles = FileUtils.listFiles(
			directory, tokenizer.getFileFilter(),
			DirectoryFileFilter.DIRECTORY);

	trainingFiles.remove(currentFile);

	renamer.buildRenamingModel(trainingFiles);

	final IScopeExtractor scopeExtractor = new VariableScopeExtractor.VariableScopeSnippetExtractor();

	final SegmentRenamingSuggestion suggestion = new SegmentRenamingSuggestion(
			renamer, scopeExtractor, useUNK);

	return suggestion.rankSuggestions(currentFile);
}
 
Example #12
Source File: VariableUsageStatistics.java    From naturalize with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
	if (args.length < 2) {
		System.err.println("Usage <projectFolder> variable|method");
		return;
	}

	final IScopeExtractor scopeExtractor = ScopesTUI
			.getScopeExtractorByName(args[1]);

	final File directory = new File(args[0]);
	final VariableUsageStatistics vus = new VariableUsageStatistics(
			directory, new JavaTokenizer(), scopeExtractor);
	vus.extractStats();
	vus.printStats();
}
 
Example #13
Source File: JavaASTExtractor.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Hacky way to compare snippets.
 *
 * @param snippet
 * @return
 */
private String normalizeCode(final char[] snippet) {
	final List<String> tokens = (new JavaTokenizer())
			.tokenListFromCode(snippet);

	final StringBuffer bf = new StringBuffer();
	for (final String token : tokens) {
		if (token.equals(ITokenizer.SENTENCE_START)
				|| token.equals(ITokenizer.SENTENCE_END)) {
			continue;
		} else {
			bf.append(token);
		}
		bf.append(" ");
	}
	return bf.toString();

}
 
Example #14
Source File: JavaBindingsToJson.java    From api-mining with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Extract the bindings from the input folder to the output file, using the
 * bindingExtractor.
 *
 * @param inputFolder
 * @param outputFile
 * @param bindingExtractor
 * @throws IOException
 * @throws JsonIOException
 */
public static void extractBindings(final File inputFolder,
		final File outputFile,
		final AbstractJavaNameBindingsExtractor bindingExtractor)
		throws IOException, JsonIOException {
	final Collection<File> allFiles = FileUtils
			.listFiles(inputFolder, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);
	final List<SerializableResolvedSourceCode> resolvedCode = allFiles
			.parallelStream()
			.map(f -> getResolvedCode(f, bindingExtractor))
			.filter(r -> r != null)
			.map(r -> SerializableResolvedSourceCode
					.fromResolvedSourceCode(r))
			.filter(s -> !s.boundVariables.isEmpty())
			.collect(Collectors.toList());

	final FileWriter writer = new FileWriter(outputFile);
	try {
		final Gson gson = new Gson();
		gson.toJson(resolvedCode, writer);
	} finally {
		writer.close();
	}
}
 
Example #15
Source File: SegmentRenamingSuggestion.java    From naturalize with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static void main(final String[] args)
		throws IllegalArgumentException, SecurityException,
		InstantiationException, IllegalAccessException,
		InvocationTargetException, NoSuchMethodException,
		ClassNotFoundException, IOException {
	if (args.length < 4) {
		System.err
				.println("Usage <TestFile> <TrainDirectory> <renamerClass> variable|method");
		return;
	}

	final ITokenizer tokenizer = new JavaTokenizer();

	final AbstractIdentifierRenamings renamer = (AbstractIdentifierRenamings) Class
			.forName(args[2]).getDeclaredConstructor(ITokenizer.class)
			.newInstance(tokenizer);

	renamer.buildRenamingModel(FileUtils.listFiles(new File(args[1]),
			tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY));

	final IScopeExtractor scopeExtractor = ScopesTUI
			.getScopeExtractorByName(args[3]);
	final SegmentRenamingSuggestion suggestion = new SegmentRenamingSuggestion(
			renamer, scopeExtractor, true);

	System.out.println(suggestion.rankSuggestions(new File(args[0])));

}
 
Example #16
Source File: ChangingIdentifiersRepositoryWalker.java    From naturalize with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param repositoryDirectory
 * @param walkingStrategy
 * @throws IOException
 */
public ChangingIdentifiersRepositoryWalker(
		final String repositoryDirectory,
		final ICommitWalkingStrategy walkingStrategy) throws IOException {
	super(repositoryDirectory, walkingStrategy);
	editListRetriever = new EditListRetriever(repository,
			JavaTokenizer.javaCodeFileFilter);
}
 
Example #17
Source File: TokenizeJavaCodeTest.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testTokenTypes() {
	ITokenizer tokenizer = new JavaTokenizer();
	assertEquals(
			tokenizer.getTokenFromString("hello"),
			new ITokenizer.FullToken("hello", tokenizer.getIdentifierType()));
	assertEquals(
			tokenizer.getTokenFromString("{"),
			new ITokenizer.FullToken("{", Integer
					.toString(ITerminalSymbols.TokenNameLBRACE)));

}
 
Example #18
Source File: JavaMethodDeclarationBindingExtractor.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
public JavaMethodDeclarationBindingExtractor(
		final boolean includeOverrides, final File inputFolder) {
	super(new JavaTokenizer());
	this.includeOverrides = includeOverrides;
	if (!includeOverrides) {
		pti = buildProjectTypeInformation(inputFolder);
	} else {
		pti = null;
	}
}
 
Example #19
Source File: UsagePointExtractor.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param args
 */
public static void main(final String[] args) {
	if (args.length != 2) {
		System.err.println("Usage <fullyQualifiedClass> <directory>");
		System.exit(-1);
	}

	final File directory = new File(args[1]);
	final String qualifiedClass = args[0];

	for (final File fi : FileUtils
			.listFiles(directory, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY)) {
		try {
			final List<ASTNode> usages = usagePoints(qualifiedClass, fi);
			if (!usages.isEmpty()) {
				System.out.println(fi.getAbsolutePath());
				for (final ASTNode node : usages) {
					System.out
							.println("----------------------------------------------");
					System.out.println(node);
				}
			}
		} catch (final Exception e) {
			System.err.println("Error processing " + fi.getName());
		}

	}

}
 
Example #20
Source File: JavaMethodDeclarationBindingExtractor.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public JavaMethodDeclarationBindingExtractor(
		final boolean includeOverrides, final File inputFolder) {
	super(new JavaTokenizer());
	this.includeOverrides = includeOverrides;
	if (!includeOverrides) {
		pti = buildProjectTypeInformation(inputFolder);
	} else {
		pti = null;
	}
}
 
Example #21
Source File: MethodsInClass.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static void main(final String[] args) {
	if (args.length != 1) {
		System.err.println("Usage <projectDir>");
		System.exit(-1);
	}

	final MethodsInClass mic = new MethodsInClass();
	mic.scan(FileUtils
			.listFiles(new File(args[0]), JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY));
	System.out.println(mic);
}
 
Example #22
Source File: ProjectTypeInformation.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void collect() {
	final Collection<File> allFiles = FileUtils
			.listFiles(projectDirectory, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);
	methodInformation.scan(allFiles);
	final JavaTypeHierarchyExtractor hierarchyExtractor = new JavaTypeHierarchyExtractor();
	hierarchyExtractor.addFilesToCorpus(allFiles);
	hierarchy = hierarchyExtractor.getHierarchy();
}
 
Example #23
Source File: TokenizeJavaCodeTest.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testTokenTypes() {
	ITokenizer tokenizer = new JavaTokenizer();
	assertEquals(
			tokenizer.getTokenFromString("hello"),
			new ITokenizer.FullToken("hello", tokenizer.getIdentifierType()));
	assertEquals(
			tokenizer.getTokenFromString("{"),
			new ITokenizer.FullToken("{", Integer
					.toString(ITerminalSymbols.TokenNameLBRACE)));

}
 
Example #24
Source File: JavaAstExtractorTest.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private boolean snippetMatchesAstTokens(final String snippetCode,
		final ASTNode node) {
	final JavaTokenizer tokenizer = new JavaTokenizer();
	final List<String> snippetTokens = tokenizer
			.tokenListFromCode(snippetCode.toCharArray());
	final List<String> astTokens = tokenizer.tokenListFromCode(node
			.toString().toCharArray());
	return astTokens.equals(snippetTokens);
}
 
Example #25
Source File: UsagePointExtractor.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param args
 */
public static void main(final String[] args) {
	if (args.length != 2) {
		System.err.println("Usage <fullyQualifiedClass> <directory>");
		System.exit(-1);
	}

	final File directory = new File(args[1]);
	final String qualifiedClass = args[0];

	for (final File fi : FileUtils
			.listFiles(directory, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY)) {
		try {
			final List<ASTNode> usages = usagePoints(qualifiedClass, fi);
			if (!usages.isEmpty()) {
				System.out.println(fi.getAbsolutePath());
				for (final ASTNode node : usages) {
					System.out
							.println("----------------------------------------------");
					System.out.println(node);
				}
			}
		} catch (final Exception e) {
			System.err.println("Error processing " + fi.getName());
		}

	}

}
 
Example #26
Source File: JavaMethodDeclarationBindingExtractor.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public JavaMethodDeclarationBindingExtractor(
		final boolean includeOverrides, final File inputFolder) {
	super(new JavaTokenizer());
	this.includeOverrides = includeOverrides;
	if (!includeOverrides) {
		pti = buildProjectTypeInformation(inputFolder);
	} else {
		pti = null;
	}
}
 
Example #27
Source File: MethodsInClass.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static void main(final String[] args) {
	if (args.length != 1) {
		System.err.println("Usage <projectDir>");
		System.exit(-1);
	}

	final MethodsInClass mic = new MethodsInClass();
	mic.scan(FileUtils
			.listFiles(new File(args[0]), JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY));
	System.out.println(mic);
}
 
Example #28
Source File: ProjectTypeInformation.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void collect() {
	final Collection<File> allFiles = FileUtils
			.listFiles(projectDirectory, JavaTokenizer.javaCodeFileFilter,
					DirectoryFileFilter.DIRECTORY);
	methodInformation.scan(allFiles);
	final JavaTypeHierarchyExtractor hierarchyExtractor = new JavaTypeHierarchyExtractor();
	hierarchyExtractor.addFilesToCorpus(allFiles);
	hierarchy = hierarchyExtractor.getHierarchy();
}
 
Example #29
Source File: TokenizeJavaCodeTest.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testTokenTypes() {
	ITokenizer tokenizer = new JavaTokenizer();
	assertEquals(
			tokenizer.getTokenFromString("hello"),
			new ITokenizer.FullToken("hello", tokenizer.getIdentifierType()));
	assertEquals(
			tokenizer.getTokenFromString("{"),
			new ITokenizer.FullToken("{", Integer
					.toString(ITerminalSymbols.TokenNameLBRACE)));

}
 
Example #30
Source File: JavaAstExtractorTest.java    From codemining-core with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private boolean snippetMatchesAstTokens(final String snippetCode,
		final ASTNode node) {
	final JavaTokenizer tokenizer = new JavaTokenizer();
	final List<String> snippetTokens = tokenizer
			.tokenListFromCode(snippetCode.toCharArray());
	final List<String> astTokens = tokenizer.tokenListFromCode(node
			.toString().toCharArray());
	return astTokens.equals(snippetTokens);
}