java source code of SelectionSuggestionEval

/**
 * 
 */
package renaming.evaluation;

import static com.google.common.base.Preconditions.checkArgument;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.logging.Logger;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.DirectoryFileFilter;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.lang.math.RandomUtils;
import org.eclipse.jdt.core.dom.ASTNode;
import org.eclipse.jdt.core.dom.MethodDeclaration;

import renaming.renamers.AbstractIdentifierRenamings;
import renaming.renamers.BaseIdentifierRenamings;
import renaming.segmentranking.SnippetScorer;
import renaming.segmentranking.SnippetScorer.SnippetSuggestions;
import codemining.java.codedata.MethodRetriever;
import codemining.java.codeutils.JavaASTExtractor;
import codemining.java.codeutils.scopes.ScopedIdentifierRenaming;
import codemining.java.codeutils.scopes.ScopesTUI;
import codemining.java.tokenizers.JavaTokenizer;
import codemining.languagetools.IScopeExtractor;
import codemining.languagetools.ITokenizer;
import codemining.languagetools.ParseType;
import codemining.languagetools.Scope;
import codemining.util.parallel.ParallelThreadPool;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;

/**
 * Compare snippets based on their score. Have we found the purturbed one?
 * 
 * @author Miltos Allamanis <[email protected]>
 * 
 */
public class SelectionSuggestionEval {

	public static class EvaluationResults {
		// The various thresholds to use for the Charles measure when rejecting
		public static final double[] THRESHOLDS = { .1, .5, 1, 1.5, 2, 2.5, 3,
				5, 7, 9, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27,
				28, 29, 30, 35, 40 };

		// The various percentage of perturbation to consider
		public static final double[] PERCENTAGE_PERTURBATION = { 0, .1, .2, .3,
				.4, .5, .6, .7, .8, .9, 1 };

		private final long[][] nReject = new long[PERCENTAGE_PERTURBATION.length][THRESHOLDS.length];

		private long nSnippetsConsidered = 0;

		public void printResults() {
			System.out.println("t=" + Arrays.toString(THRESHOLDS));
			System.out
					.println("pp=" + Arrays.toString(PERCENTAGE_PERTURBATION));
			System.out.print("rejectVals=[");
			for (int i = 0; i < nReject.length; i++) {
				System.out.print(Arrays.toString(nReject[i]) + ";");
			}
			System.out.println("]");
			System.out.println("nConsidered=" + nSnippetsConsidered);
		}

		public synchronized void pushResult(
				final double[] scoresAtPerturbationLevels) {
			checkArgument(scoresAtPerturbationLevels.length == PERCENTAGE_PERTURBATION.length);
			nSnippetsConsidered++;

			for (int i = 0; i < PERCENTAGE_PERTURBATION.length; i++) {
				for (int j = 0; j < THRESHOLDS.length; j++) {
					if (scoresAtPerturbationLevels[i] > THRESHOLDS[j]) {
						nReject[i][j]++;
					}
				}
			}

		}
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		if (args.length != 3) {
			System.err
					.println("Usage <projectDir> variable|method|type|all <nExperiments>");
			return;
		}

		final SelectionSuggestionEval sse = new SelectionSuggestionEval(
				new File(args[0]), new JavaTokenizer(),
				ScopesTUI.getScopeExtractorByName(args[1]));

		final int nExperiments = Integer.parseInt(args[2]);
		final ParallelThreadPool pt = new ParallelThreadPool();
		for (int i = 0; i < nExperiments; i++) {
			pt.pushTask(new Runnable() {

				@Override
				public void run() {
					try {
						sse.runSingleExperiment();
					} catch (IOException e) {
						LOGGER.warning("Failed to run single experiment "
								+ ExceptionUtils.getFullStackTrace(e));
					}
				}

			});

			if (RandomUtils.nextDouble() < .1) {
				pt.pushTask(new Runnable() {
					@Override
					public void run() {
						sse.results.printResults();
					}
				});
			}
		}

		pt.waitForTermination();
		sse.results.printResults();
	}

	final Collection<File> allFiles;

	final ITokenizer tokenizer;

	final IScopeExtractor scopeExtractor;

	final EvaluationResults results = new EvaluationResults();

	private static final Logger LOGGER = Logger
			.getLogger(SelectionSuggestionEval.class.getName());

	// Number of snippets to evaluate per time
	public static final int N_SNIPPETS = 500;

	/**
	 * 
	 */
	public SelectionSuggestionEval(final File directory,
			final ITokenizer codeTokenizer, final IScopeExtractor extractor) {
		tokenizer = codeTokenizer;
		allFiles = FileUtils.listFiles(directory, tokenizer.getFileFilter(),
				DirectoryFileFilter.DIRECTORY);
		scopeExtractor = extractor;
	}

	/**
	 * @param renamer
	 * @return
	 */
	public List<File> buildRenamerAndGetTargetMethods(
			final BaseIdentifierRenamings renamer) {
		final List<File> testFiles = Lists.newArrayList();

		final List<File> allFilesList = Lists.newArrayList(allFiles);
		Collections.shuffle(allFilesList);

		int nGatheredMethods = 0;
		for (final File f : allFilesList) {
			try {
				final Map<String, MethodDeclaration> fileMethods = MethodRetriever
						.getMethodNodes(f);
				if (fileMethods.size() == 0) {
					continue;
				}
				nGatheredMethods += fileMethods.size();
				testFiles.add(f);
			} catch (Throwable e) {
				LOGGER.warning("Error in file " + f.getAbsolutePath() + "\n"
						+ ExceptionUtils.getFullStackTrace(e));
			}
			if (nGatheredMethods >= N_SNIPPETS) {
				break;
			}
		}
		checkArgument(nGatheredMethods >= N_SNIPPETS);

		final Set<File> trainingFiles = Sets.newTreeSet(allFiles);
		checkArgument(trainingFiles.removeAll(testFiles));

		renamer.buildRenamingModel(trainingFiles);
		return testFiles;
	}

	private void evaluatePerformanceOn(final MethodDeclaration currentMethod,
			final AbstractIdentifierRenamings renamer,
			final List<String> randomVars) throws IOException {
		final JavaASTExtractor ex = new JavaASTExtractor(false);
		final ScopedIdentifierRenaming identifierRenamer = new ScopedIdentifierRenaming(
				scopeExtractor, ParseType.METHOD);
		final double[] scoreAtPerturbationLevel = new double[EvaluationResults.PERCENTAGE_PERTURBATION.length];
		final Multimap<Scope, String> scopes = scopeExtractor
				.getFromNode(currentMethod);

		if (scopes.entries().size() < EvaluationResults.PERCENTAGE_PERTURBATION.length - 1) {
			return;
		}

		// Deduplicate keys. We will use the same!
		final List<String> identifiersToBeRenamed = Lists.newArrayList(Sets
				.newTreeSet(scopes.values()));
		Collections.shuffle(identifiersToBeRenamed);

		for (int i = 0; i < EvaluationResults.PERCENTAGE_PERTURBATION.length; i++) {
			// Perturb this extra percent of identifiers
			final Map<String, String> renamingPlan = generateRenamingPlan(
					identifiersToBeRenamed,
					EvaluationResults.PERCENTAGE_PERTURBATION[i], renamer,
					randomVars);
			// rename using the renamer naively. Note that we don't care about
			// scopes
			// and naming collisions here, some will happen. Life is hard.
			final String renamedMethod = identifierRenamer.getRenamedCode(
					currentMethod.toString(), currentMethod.toString(),
					renamingPlan);
			// and create an AST Node
			final ASTNode renamedMethodNode = ex.getASTNode(renamedMethod,
					ParseType.METHOD);

			// get score
			final SnippetSuggestions ss = SnippetScorer.scoreSnippet(
					renamedMethodNode, renamer, scopeExtractor, false, false);

			scoreAtPerturbationLevel[i] = ss.score / ss.suggestions.size();
		}
		results.pushResult(scoreAtPerturbationLevel);
	}

	/**
	 * Returns a plan of how each variable name will be renamed.
	 * 
	 * @param identifiersToBeRenamed
	 * @param percent
	 * @return
	 */
	private Map<String, String> generateRenamingPlan(
			final List<String> identifiersToBeRenamed, final double percent,
			final AbstractIdentifierRenamings renamer,
			final List<String> randomVars) {
		checkArgument(percent <= 1);
		checkArgument(percent >= 0);
		final int upTo = (int) (percent * identifiersToBeRenamed.size());
		final Map<String, String> renamingPlan = Maps.newTreeMap();

		for (int i = 0; i < upTo; i++) {
			final String rName = getRandomName(randomVars, renamer.getLM()
					.getTokenizer());
			renamingPlan.put(identifiersToBeRenamed.get(i), rName);
		}
		return renamingPlan;
	}

	private String getRandomName(final List<String> randomVars,
			final ITokenizer tokenizer) {
		String name = randomVars.get(RandomUtils.nextInt(randomVars.size()));
		while (!tokenizer.getTokenFromString(name).tokenType.equals(tokenizer
				.getIdentifierType())) {
			name = randomVars.get(RandomUtils.nextInt(randomVars.size()));
		}
		return name;
	}

	public void runSingleExperiment() throws IOException {
		final BaseIdentifierRenamings renamer = new BaseIdentifierRenamings(
				tokenizer);
		final List<File> selectedFiles = buildRenamerAndGetTargetMethods(renamer);
		final List<String> allToks = Lists.newArrayList(renamer.getLM()
				.getTrie().getVocabulary());
		Collections.shuffle(allToks);
		for (final File f : selectedFiles) {
			try {
				for (final Entry<String, MethodDeclaration> method : MethodRetriever
						.getMethodNodes(f).entrySet()) {
					evaluatePerformanceOn(method.getValue(), renamer, allToks);
				}
			} catch (Throwable e) {
				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
			}

		}

	}
}