Java Code Examples for edu.stanford.nlp.stats.Counter#addAll()

The following examples show how to use edu.stanford.nlp.stats.Counter#addAll() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DocumentFrequencyCounter.java    From wiseowl with MIT License 5 votes vote down vote up
public static void main(String[] args) throws InterruptedException, ExecutionException,
  IOException {
  ExecutorService pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
  List<Future<Counter<String>>> futures = new ArrayList<Future<Counter<String>>>();

  for (String filePath : args)
    futures.add(pool.submit(new FileIDFBuilder(new File(filePath))));

  int finished = 0;
  Counter<String> overall = new ClassicCounter<String>();

  for (Future<Counter<String>> future : futures) {
    System.err.printf("%s: Polling future #%d / %d%n",
        dateFormat.format(new Date()), finished + 1, args.length);
    Counter<String> result = future.get();
    finished++;
    System.err.printf("%s: Finished future #%d / %d%n",
        dateFormat.format(new Date()), finished, args.length);

    System.err.printf("\tMerging counter.. ");
    overall.addAll(result);
    System.err.printf("done.%n");
  }
  pool.shutdown();

  System.err.printf("\n%s: Saving to '%s'.. ", dateFormat.format(new Date()),
      OUT_FILE);
  ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(OUT_FILE));
  oos.writeObject(overall);
  System.err.printf("done.%n");
}
 
Example 2
Source File: AbstractOnlineOptimizer.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Counter<String> getBatchGradient(Counter<String> weights,
    List<Sequence<IString>> sources, int[] sourceIds,
    List<List<RichTranslation<IString, String>>> translations,
    List<List<Sequence<IString>>> references,
    double[] referenceWeights,
    SentenceLevelMetric<IString, String> scoreMetric) {
  Counter<String> batchGradient = new ClassicCounter<String>();

  for (int i = 0; i < sourceIds.length; i++) {
    if (translations.get(i).size() > 0) {
      // Skip decoder failures.
      Counter<String> unregularizedGradient = getUnregularizedGradient(weights, sources.get(i), sourceIds[i], translations.get(i), references.get(i), referenceWeights, scoreMetric);
      batchGradient.addAll(unregularizedGradient);
    }
  }

  // Add L2 regularization directly into the derivative
  if (this.l2Regularization) {
    final Set<String> features = new HashSet<String>(weights.keySet());
    features.addAll(weights.keySet());
    final double dataFraction = sourceIds.length /(double) tuneSetSize;
    final double scaledInvSigmaSquared = dataFraction/(2*sigmaSq);
    for (String key : features) {
      double x = weights.getCount(key);
      batchGradient.incrementCount(key, x * scaledInvSigmaSquared);
    }
  }

  return batchGradient;
}
 
Example 3
Source File: PerceptronOptimizer.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Counter<String> optimize(Counter<String> initialWts) {

  List<ScoredFeaturizedTranslation<IString, String>> target = (new HillClimbingMultiTranslationMetricMax<IString, String>(
      emetric)).maximize(nbest);
  Counter<String> targetFeatures = MERT.summarizedAllFeaturesVector(target);
  Counter<String> wts = initialWts;

  while (true) {
    Scorer<String> scorer = new DenseScorer(wts, MERT.featureIndex);
    MultiTranslationMetricMax<IString, String> oneBestSearch = new HillClimbingMultiTranslationMetricMax<IString, String>(
        new ScorerWrapperEvaluationMetric<IString, String>(scorer));
    List<ScoredFeaturizedTranslation<IString, String>> oneBest = oneBestSearch
        .maximize(nbest);
    Counter<String> dir = MERT.summarizedAllFeaturesVector(oneBest);
    Counters.multiplyInPlace(dir, -1.0);
    dir.addAll(targetFeatures);
    Counter<String> newWts = mert.lineSearch(nbest, wts, dir, emetric);
    double ssd = 0;
    for (String k : newWts.keySet()) {
      double diff = wts.getCount(k) - newWts.getCount(k);
      ssd += diff * diff;
    }
    wts = newWts;
    if (ssd < MERT.NO_PROGRESS_SSD)
      break;
  }
  return wts;
}
 
Example 4
Source File: BasicPowellOptimizer.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public Counter<String> optimize(Counter<String> initialWts) {
  Counter<String> wts = initialWts;

  // initialize search directions
  List<Counter<String>> axisDirs = new ArrayList<Counter<String>>(
      initialWts.size());
  List<String> featureNames = new ArrayList<String>(wts.keySet());
  Collections.sort(featureNames);
  for (String featureName : featureNames) {
    Counter<String> dir = new ClassicCounter<String>();
    dir.incrementCount(featureName);
    axisDirs.add(dir);
  }

  // main optimization loop
  Counter[] p = new ClassicCounter[axisDirs.size()];
  double objValue = MERT.evalAtPoint(nbest, wts, emetric); // obj value w/o
  // smoothing
  List<Counter<String>> dirs = null;
  for (int iter = 0;; iter++) {
    if (iter % p.length == 0) {
      // reset after N iterations to avoid linearly dependent search
      // directions
      System.err.printf("%d: Search direction reset\n", iter);
      dirs = new ArrayList<Counter<String>>(axisDirs);
    }
    // search along each direction
    assert (dirs != null);
    p[0] = mert.lineSearch(nbest, wts, dirs.get(0), emetric);
    for (int i = 1; i < p.length; i++) {
      p[i] = mert.lineSearch(nbest, (Counter<String>) p[i - 1], dirs.get(i),
          emetric);
      dirs.set(i - 1, dirs.get(i)); // shift search directions
    }

    double totalWin = MERT.evalAtPoint(nbest, p[p.length - 1], emetric)
        - objValue;
    System.err.printf("%d: totalWin: %e Objective: %e\n", iter, totalWin,
        objValue);
    if (Math.abs(totalWin) < MERT.MIN_OBJECTIVE_DIFF)
      break;

    // construct combined direction
    Counter<String> combinedDir = new ClassicCounter<String>(wts);
    Counters.multiplyInPlace(combinedDir, -1.0);
    combinedDir.addAll(p[p.length - 1]);

    dirs.set(p.length - 1, combinedDir);

    // search along combined direction
    wts = mert.lineSearch(nbest, (Counter<String>) p[p.length - 1],
        dirs.get(p.length - 1), emetric);
    objValue = MERT.evalAtPoint(nbest, wts, emetric);
    System.err.printf("%d: Objective after combined search %e\n", iter,
        objValue);
  }

  return wts;
}
 
Example 5
Source File: PowellOptimizer.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public Counter<String> optimize(Counter<String> initialWts) {

  Counter<String> wts = initialWts;

  // initialize search directions
  List<Counter<String>> dirs = new ArrayList<Counter<String>>(
      initialWts.size());
  List<String> featureNames = new ArrayList<String>(wts.keySet());
  Collections.sort(featureNames);
  for (String featureName : featureNames) {
    Counter<String> dir = new ClassicCounter<String>();
    dir.incrementCount(featureName);
    dirs.add(dir);
  }

  // main optimization loop
  Counter[] p = new ClassicCounter[dirs.size()];
  double objValue = MERT.evalAtPoint(nbest, wts, emetric); // obj value w/o
  // smoothing
  for (int iter = 0;; iter++) {
    // search along each direction
    p[0] = mert.lineSearch(nbest, wts, dirs.get(0), emetric);
    double eval = MERT.evalAtPoint(nbest, p[0], emetric);
    double biggestWin = Math.max(0, eval - objValue);
    System.err.printf("initial totalWin: %e (%e-%e)\n", biggestWin, eval,
        objValue);
    System.err.printf("apply @ wts: %e\n",
        MERT.evalAtPoint(nbest, wts, emetric));
    System.err.printf("apply @ p[0]: %e\n",
        MERT.evalAtPoint(nbest, p[0], emetric));
    objValue = eval;
    int biggestWinId = 0;
    double totalWin = biggestWin;
    double initObjValue = objValue;
    for (int i = 1; i < p.length; i++) {
      p[i] = mert.lineSearch(nbest, (Counter<String>) p[i - 1], dirs.get(i),
          emetric);
      eval = MERT.evalAtPoint(nbest, p[i], emetric);
      if (Math.max(0, eval - objValue) > biggestWin) {
        biggestWin = eval - objValue;
        biggestWinId = i;
      }
      totalWin += Math.max(0, eval - objValue);
      System.err.printf("\t%d totalWin: %e(%e-%e)\n", i, totalWin, eval,
          objValue);
      objValue = eval;
    }

    System.err.printf("%d: totalWin %e biggestWin: %e objValue: %e\n", iter,
        totalWin, biggestWin, objValue);

    // construct combined direction
    Counter<String> combinedDir = new ClassicCounter<String>(wts);
    Counters.multiplyInPlace(combinedDir, -1.0);
    combinedDir.addAll(p[p.length - 1]);

    // check to see if we should replace the dominant 'win' direction
    // during the last iteration of search with the combined search direction
    Counter<String> testPoint = new ClassicCounter<String>(p[p.length - 1]);
    testPoint.addAll(combinedDir);
    double testPointEval = MERT.evalAtPoint(nbest, testPoint, emetric);
    double extrapolatedWin = testPointEval - objValue;
    System.err.printf("Test Point Eval: %e, extrapolated win: %e\n",
        testPointEval, extrapolatedWin);
    if (extrapolatedWin > 0
        && 2 * (2 * totalWin - extrapolatedWin)
            * Math.pow(totalWin - biggestWin, 2.0) < Math.pow(
            extrapolatedWin, 2.0) * biggestWin) {
      System.err.printf(
          "%d: updating direction %d with combined search dir\n", iter,
          biggestWinId);
      MERT.normalize(combinedDir);
      dirs.set(biggestWinId, combinedDir);
    }

    // Search along combined dir even if replacement didn't happen
    wts = mert.lineSearch(nbest, p[p.length - 1], combinedDir, emetric);
    eval = MERT.evalAtPoint(nbest, wts, emetric);
    System.err.printf(
        "%d: Objective after combined search (gain: %e prior:%e)\n", iter,
        eval - objValue, objValue);

    objValue = eval;

    double finalObjValue = objValue;
    System.err.printf("Actual win: %e (%e-%e)\n", finalObjValue
        - initObjValue, finalObjValue, initObjValue);
    if (Math.abs(initObjValue - finalObjValue) < MERT.MIN_OBJECTIVE_DIFF)
      break; // changed to prevent infinite loops
  }

  return wts;
}
 
Example 6
Source File: MERT.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Run the tuning algorithm. Sets up random starting points, generates candidates, and optimizes weights.
 */
@Override
public void run() {

  System.out.printf("\nthread started (%d): %s\n", startingPoints.size(),
      this);

  while (true) {

    Counter<String> wts;

    int sz;
    synchronized (startingPoints) {
      sz = startingPoints.size();
      wts = startingPoints.poll();
    }
    if (wts == null)
      break;

    int ptI = nInitialStartingPoints - sz;

    // Make the seed a function of current starting point, to
    // ensure experiments are reproducible:
    List<Double> v = new ArrayList<Double>(wts.values());
    Collections.sort(v);
    v.add(SEED * 1.0);
    long threadSeed = Arrays.hashCode(v.toArray());
    this.random = new Random(threadSeed);

    System.out.printf("\npoint %d - initial wts: %s", ptI, wts.toString());
    System.out.printf("\npoint %d - seed: %d\n", ptI, threadSeed);

    BatchOptimizer opt = BatchOptimizerFactory.factory(optStr, ptI, this);
    System.err.println("using: " + opt.toString());

    // Make sure weights that shouldn't be optimized are not in wts:
    removeWts(wts, fixedWts);
    Counter<String> optWts = opt.optimize(wts);
    // Temporarily add them back before normalization:
    if (fixedWts != null)
      optWts.addAll(fixedWts);
    Counter<String> newWts;
    if (opt.doNormalization()) {
      System.err.printf("Normalizing weights\n");
      newWts = normalize(optWts);
    } else {
      System.err.printf("Saving unnormalized weights\n");
      newWts = optWts;
    }
    // Remove them again:
    removeWts(newWts, fixedWts);

    double evalAt = evalAtPoint(nbest, newWts, emetric);
    double mcmcEval = mcmcTightExpectedEval(nbest, newWts, emetric);
    double mcmcEval2 = mcmcTightExpectedEval(nbest, bestWts, emetric, false);

    double obj = (mcmcObj ? mcmcEval : -evalAt);
    updateBest(newWts, -evalAt);
    System.out.printf("\npoint %d - final wts: %s", ptI, newWts.toString());
    System.out
        .printf(
            "\npoint %d - apply: %e E(apply): %e obj: %e best obj: %e (l1: %f)\n\n",
            ptI, evalAt, mcmcEval2, obj, bestObj, l1norm(newWts));
  }
}