edu.stanford.nlp.stats.ClassicCounter Java Examples

The following examples show how to use edu.stanford.nlp.stats.ClassicCounter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DocumentFrequencyCounter.java    From wiseowl with MIT License 6 votes vote down vote up
/**
 * Get an IDF map for the given document string.
 *
 * @param document
 * @return
 */
private static Counter<String> getIDFMapForDocument(String document) {
  // Clean up -- remove some Gigaword patterns that slow things down
  // / don't help anything
  document = headingSeparator.matcher(document).replaceAll("");

  DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document));
  preprocessor.setTokenizerFactory(tokenizerFactory);

  Counter<String> idfMap = new ClassicCounter<String>();
  for (List<HasWord> sentence : preprocessor) {
    if (sentence.size() > MAX_SENTENCE_LENGTH)
      continue;

    List<TaggedWord> tagged = tagger.tagSentence(sentence);

    for (TaggedWord w : tagged) {
      if (w.tag().startsWith("n"))
        idfMap.incrementCount(w.word());
    }
  }

  return idfMap;
}
 
Example #2
Source File: ConvertWeights.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static void main(String[] args) {
  if (args.length != 1) {
    System.err.printf("Usage: java %s old_wts%n", ConvertWeights.class.getName());
    System.exit(-1);
  }
  String filename = args[0];
  Counter<String> oldWeights = IOTools.deserialize(filename, ClassicCounter.class, 
      SerializationMode.DEFAULT);
  Path oldFilename = Paths.get(filename + ".old");
  try {
    Files.move(Paths.get(filename), oldFilename);
  } catch (IOException e) {
    e.printStackTrace();
    System.exit(-1);
  }
  IOTools.writeWeights(filename, oldWeights);
  System.out.printf("Converted %s to new format (old file moved to %s)%n",
      filename, oldFilename.toString());
}
 
Example #3
Source File: DependencyBnBPreorderer.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
private static Set<String> getMostFrequentTokens(LineNumberReader reader, int k) throws IOException {
  
  Counter<String> tokenCounts = new ClassicCounter<String>();
  
  String line;
  while ((line = reader.readLine()) != null) {
    String tokens[] = line.split("\\s+");
    for (String t : tokens) {
      tokenCounts.incrementCount(t);
    }
  }

  Set<String> mostFrequentTokens = new HashSet<>(k);
  Counters.retainTop(tokenCounts, k);
  mostFrequentTokens.addAll(tokenCounts.keySet());
  tokenCounts = null;
  return mostFrequentTokens;
}
 
Example #4
Source File: MetricUtils.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates the "informativeness" of each ngram, which is used by the NIST
 * metric. In Matlab notation, the informativeness of the ngram w_1:n is
 * defined as -log2(count(w_1:n)/count(w_1:n-1)).
 * 
 * @param ngramCounts
 *          ngram counts according to references
 * @param totWords
 *          total number of words, which is used to compute the
 *          informativeness of unigrams.
 */
static public <TK> Counter<Sequence<TK>> getNGramInfo(
    Counter<Sequence<TK>> ngramCounts, int totWords) {
  Counter<Sequence<TK>> ngramInfo = new ClassicCounter<Sequence<TK>>();

  for (Sequence<TK> ngram : ngramCounts.keySet()) {
    double num = ngramCounts.getCount(ngram);
    double denom = totWords;
    if (ngram.size() > 1) {
      Sequence<TK> ngramPrefix = ngram.subsequence(0,
          ngram.size() - 1);
      denom = ngramCounts.getCount(ngramPrefix);
    }
    double inf = -Math.log(num / denom) / LOG2;
    ngramInfo.setCount(ngram, inf);
    // System.err.printf("ngram info: %s %.3f\n", ngram.toString(), inf);
  }
  return ngramInfo;
}
 
Example #5
Source File: MetricUtils.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Compute maximum n-gram counts from one or more sequences.
 * 
 * @param sequences - The list of sequences.
 * @param maxOrder - The n-gram order.
 */
static public <TK> Counter<Sequence<TK>> getMaxNGramCounts(
    List<Sequence<TK>> sequences, double[] seqWeights, int maxOrder) {
  Counter<Sequence<TK>> maxCounts = new ClassicCounter<Sequence<TK>>();
  maxCounts.setDefaultReturnValue(0.0);
  if(seqWeights != null && seqWeights.length != sequences.size()) {
    throw new RuntimeException("Improper weight vector for sequences.");
  }
  
  int seqId = 0;
  for (Sequence<TK> sequence : sequences) {
    Counter<Sequence<TK>> counts = getNGramCounts(sequence, maxOrder);
    for (Sequence<TK> ngram : counts.keySet()) {
      double weight = seqWeights == null ? 1.0 : seqWeights[seqId];
      double countValue = weight * counts.getCount(ngram);
      double currentMax = maxCounts.getCount(ngram);
      maxCounts.setCount(ngram, Math.max(countValue, currentMax));
    }
    ++seqId;
  }
  return maxCounts;
}
 
Example #6
Source File: OptimizerUtils.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
public static Set<String> featureWhiteList(FlatNBestList nbest, int minSegmentCount) {
  List<List<ScoredFeaturizedTranslation<IString, String>>> nbestlists = nbest.nbestLists();
  Counter<String> featureSegmentCounts = new ClassicCounter<String>();
  for (List<ScoredFeaturizedTranslation<IString, String>> nbestlist : nbestlists) {
      Set<String> segmentFeatureSet = new HashSet<String>();
      for (ScoredFeaturizedTranslation<IString, String> trans : nbestlist) {
         for (FeatureValue<String> feature : trans.features) {
           segmentFeatureSet.add(feature.name);
         }
      }
      for (String featureName : segmentFeatureSet) {
        featureSegmentCounts.incrementCount(featureName);
      }
  }
  return Counters.keysAbove(featureSegmentCounts, minSegmentCount -1);
}
 
Example #7
Source File: OnlineTuner.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
public ProcessorInput(List<Sequence<IString>> input, 
    List<List<Sequence<IString>>> references, 
    Counter<String> weights, int[] translationIds, int inputId, 
    TranslationModel<IString,String> localTM, boolean createForcedAlignment,
    boolean additionalPrefixDecoding) {
  this.source = input;
  this.translationIds = translationIds;
  this.references = references;
  this.inputId = inputId;
  // Copy here for thread safety. DO NOT change this unless you know
  // what you're doing....
  this.weights = new ClassicCounter<String>(weights);
  this.localTM = localTM;
  this.createForcedAlignment = createForcedAlignment;
  this.additionalPrefixDecoding = additionalPrefixDecoding;
}
 
Example #8
Source File: KBPStatisticalExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static Counter<String> features(KBPInput input) {
  // Ensure RegexNER Tags!
  input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASED, false);
  input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASELESS, true);

  // Get useful variables
  ClassicCounter<String> feats = new ClassicCounter<>();
  if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {
    return new ClassicCounter<>();
  }

  // Actually featurize
  denseFeatures(input, input.sentence, feats);
  surfaceFeatures(input, input.sentence, feats);
  dependencyFeatures(input, input.sentence, feats);
  relationSpecificFeatures(input, input.sentence, feats);

  return feats;
}
 
Example #9
Source File: KBPStatisticalExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static Counter<String> features(KBPInput input) {
    // Ensure RegexNER Tags!
    input.sentence.regexner(IntelConfig.Regex_NER_caseless, false);
    input.sentence.regexner(IntelConfig.Regex_NER_cased, true);

    // Get useful variables
    ClassicCounter<String> feats = new ClassicCounter<>();
    if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {
        return new ClassicCounter<>();
    }

    // Actually featurize
    denseFeatures(input, input.sentence, feats);
    surfaceFeatures(input, input.sentence, feats);
    dependencyFeatures(input, input.sentence, feats);
    relationSpecificFeatures(input, input.sentence, feats);

    return feats;
}
 
Example #10
Source File: MERT.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
static Counter<String> randomWts(Set<String> keySet) {
  Counter<String> randpt = new ClassicCounter<String>();
  for (String f : keySet) {
    randpt.setCount(f, globalRandom.nextDouble());
  }
  System.err.printf("random Wts: %s%n", randpt);
  return randpt;
}
 
Example #11
Source File: ComputeBitextIDF.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) {
  if (args.length > 0) {
    System.err.printf("Usage: java %s < files > idf-file%n", ComputeBitextIDF.class.getName());
    System.exit(-1);
  }

  Counter<String> documentsPerTerm = new ClassicCounter<String>(1000000);
  LineNumberReader reader = new LineNumberReader(new InputStreamReader(System.in));
  double nDocuments = 0.0;
  try {
    for (String line; (line = reader.readLine()) != null;) {
      String[] tokens = line.trim().split("\\s+");
      Set<String> seen = new HashSet<String>(tokens.length);
      for (String token : tokens) {
        if ( ! seen.contains(token)) {
          seen.add(token);
          documentsPerTerm.incrementCount(token);
        }
      }
    }
    nDocuments = reader.getLineNumber();
    reader.close();
  } catch (IOException e) {
    e.printStackTrace();
  }

  // Output the idfs
  System.err.printf("Bitext contains %d sentences and %d word types%n", (int) nDocuments, documentsPerTerm.keySet().size());
  for (String wordType : documentsPerTerm.keySet()) {
    double count = documentsPerTerm.getCount(wordType);
    System.out.printf("%s\t%f%n", wordType, Math.log(nDocuments / count));
  }
  System.out.printf("%s\t%f%n", UNK_TOKEN, Math.log(nDocuments / 1.0));
}
 
Example #12
Source File: TargetFunctionWordInsertion.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
private Set<IString> loadCountsFile(String filename) {
  Counter<IString> counter = new ClassicCounter<IString>();
  LineNumberReader reader = IOTools.getReaderFromFile(filename);
  try {
    for (String line; (line = reader.readLine()) != null;) {
      String[] fields = line.trim().split("\\s+");
      if (fields.length == 2) {
        String wordType = fields[0];
        if ( ! (TokenUtils.isNumericOrPunctuationOrSymbols(wordType) ||
                wordType.equals(TokenUtils.START_TOKEN.toString()) ||
                wordType.equals(TokenUtils.END_TOKEN.toString()))) {
          counter.setCount(new IString(wordType), Double.valueOf(fields[1]));
        }
      } else {
        System.err.printf("%s: Discarding line %s%n", this.getClass().getName(), line);
      }
    }
    reader.close();
    Set<IString> set = new HashSet<>(Counters.topKeys(counter, rankCutoff));
    for (IString word : set) {
      System.err.printf(" %s%n", word);
    }
    return set;
    
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #13
Source File: FeatureValues.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Convert a collection of feature values to a counter.
 * 
 * @param featureValues
 * @return
 */
public static <T> Counter<T> toCounter(Collection<FeatureValue<T>> featureValues) {
  Counter<T> counter = new ClassicCounter<T>();
  for (FeatureValue<T> fv : featureValues) {
    counter.incrementCount(fv.name, fv.value);
  }
  return counter;
}
 
Example #14
Source File: IOTools.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Read weights from a plain text file.
 * 
 * @param filename
 * @return
 * @throws IOException
 */
public static Counter<String> readWeightsPlain(String filename) throws IOException {
  LineNumberReader reader = new LineNumberReader(new FileReader(filename));   
  Counter<String> wts = new ClassicCounter<String>();
  for (String line; (line = reader.readLine()) != null;) {
    String[] input = line.split(" ");
    if(input.length != 2) {
      reader.close();
      throw new IOException("Illegal input in weight file " + filename + ": " + line);
    }
    wts.setCount(input[0],Double.parseDouble(input[1]));
  }
  reader.close();
  return wts;
}
 
Example #15
Source File: IOTools.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Read weights from a file. Supports both binary and text formats.
 * 
 * TODO(spenceg) Replace ClassicCounter with our own SparseVector implementation.
 * 
 * @param filename
 * @param featureIndex
 * @return a counter of weights
 * @throws IOException 
 */
@SuppressWarnings("unchecked")
public static Counter<String> readWeights(String filename,
    Index<String> featureIndex) {
  Counter<String> wts = (Counter<String>) deserialize(filename, ClassicCounter.class, SerializationMode.BIN_GZ);
  if (wts == null) wts = new ClassicCounter<>();
  if (featureIndex != null) {
    for (String key : wts.keySet()) {
      featureIndex.addToIndex(key);
    }
  }
  return wts;
}
 
Example #16
Source File: Summarizer.java    From wiseowl with MIT License 5 votes vote down vote up
private static Counter<String> getTermFrequencies(List<CoreMap> sentences) {
  Counter<String> ret = new ClassicCounter<String>();

  for (CoreMap sentence : sentences)
    for (CoreLabel cl : sentence.get(CoreAnnotations.TokensAnnotation.class))
      ret.incrementCount(cl.get(CoreAnnotations.TextAnnotation.class));

  return ret;
}
 
Example #17
Source File: DocumentFrequencyCounter.java    From wiseowl with MIT License 5 votes vote down vote up
public static void main(String[] args) throws InterruptedException, ExecutionException,
  IOException {
  ExecutorService pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
  List<Future<Counter<String>>> futures = new ArrayList<Future<Counter<String>>>();

  for (String filePath : args)
    futures.add(pool.submit(new FileIDFBuilder(new File(filePath))));

  int finished = 0;
  Counter<String> overall = new ClassicCounter<String>();

  for (Future<Counter<String>> future : futures) {
    System.err.printf("%s: Polling future #%d / %d%n",
        dateFormat.format(new Date()), finished + 1, args.length);
    Counter<String> result = future.get();
    finished++;
    System.err.printf("%s: Finished future #%d / %d%n",
        dateFormat.format(new Date()), finished, args.length);

    System.err.printf("\tMerging counter.. ");
    overall.addAll(result);
    System.err.printf("done.%n");
  }
  pool.shutdown();

  System.err.printf("\n%s: Saving to '%s'.. ", dateFormat.format(new Date()),
      OUT_FILE);
  ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(OUT_FILE));
  oos.writeObject(overall);
  System.err.printf("done.%n");
}
 
Example #18
Source File: MosesCompoundSplitter.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
private void loadModel(String modelFileName) throws IOException {
  System.err.println("Loading MosesCompoundSplitter from " + modelFileName);
  LineNumberReader reader = new LineNumberReader(new FileReader(modelFileName));
  
  lcModel = new ClassicCounter<String>();
  trueCase = new HashMap<>();
  double totalCount = 0.0;
  if(useUnigramProbs) probs = new ClassicCounter<String>();

  int minCnt = Math.min(MAX_COUNT, MIN_COUNT);
  
  for (String line; (line = reader.readLine()) != null;) {
    String[] input = line.split("\t");
    if(input.length != 3) {
      reader.close();
      throw new IOException("Illegal input in model file, line " + reader.getLineNumber() + ": " + line);
    }
    long cnt = Long.parseLong(input[2]);
    totalCount += cnt;
    String tc = input[1];
    if(cnt < minCnt || tc.length() < MIN_SIZE + 1) continue; // these will never be used for splitting anyway
    
    String lc = tc.toLowerCase();
    // use the most frequent casing
    if(lcModel.getCount(lc) < cnt) {
      lcModel.setCount(lc, cnt);
      trueCase.put(lc, tc);
      //System.err.println("adding: " + input[1] + " ::: " + input[2]);
    }
  }
  
  totalCount = Math.log(totalCount);
  if(useUnigramProbs) {
    for(Entry<String, Double> e : lcModel.entrySet()) {
      probs.setCount(e.getKey(), Math.log(e.getValue()) - totalCount);
    }
  }
  reader.close();
}
 
Example #19
Source File: RepetitionRate.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
RepetitionRateIncrementalMetric() {
  ngrams = new ClassicCounter<Sequence<TK>>();
  corpus = new ArrayList<Sequence<TK>>();
  
  for(int i = 0; i < maxNgramOrder; ++i) {
    totalNonSingletonNgrams.add(0);
    totalNgrams.add(0);
    windowNonSingletonNgrams.add(0);
    windowNgrams.add(0);
  }
  
}
 
Example #20
Source File: KBPStatisticalExtractor.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
@SuppressWarnings("UnusedParameters")
private static void denseFeatures(KBPInput input, Sentence sentence, ClassicCounter<String> feats) {
    boolean subjBeforeObj = input.subjectSpan.isBefore(input.objectSpan);

    // Type signature
    indicator(feats, "type_signature", input.subjectType + "," + input.objectType);

    // Relative position
    indicator(feats, "subj_before_obj", subjBeforeObj ? "y" : "n");
}
 
Example #21
Source File: MetricUtils.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 
 * @param <TK>
 */
static public <TK> Counter<Sequence<TK>> getNGramCounts(Sequence<TK> sequence, int maxOrder) {
  Counter<Sequence<TK>> counts = new ClassicCounter<>();
  int sz = sequence.size();
  for (int i = 0; i < sz; i++) {
    int jMax = Math.min(sz, i + maxOrder);
    for (int j = i + 1; j <= jMax; j++) {
      Sequence<TK> ngram = sequence.subsequence(i, j);
      counts.incrementCount(ngram);
    }
  }
  return counts;
}
 
Example #22
Source File: NISTMetric.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
@Override
public IncrementalEvaluationMetric<TK, FV> replace(int index,
    ScoredFeaturizedTranslation<TK, FV> trans) {
  if (index > sequences.size()) {
    throw new IndexOutOfBoundsException(String.format("Index: %d >= %d",
        index, sequences.size()));
  }
  Counter<Sequence<TK>> canidateCounts = (trans == null ? new ClassicCounter<Sequence<TK>>()
      : MetricUtils.getNGramCounts(trans.translation, order));
  MetricUtils.clipCounts(canidateCounts, maxReferenceCounts.get(index));
  if (sequences.get(index) != null) {
    Counter<Sequence<TK>> oldCanidateCounts = MetricUtils.getNGramCounts(
        sequences.get(index), order);
    MetricUtils.clipCounts(oldCanidateCounts, maxReferenceCounts.get(index));
    decCounts(oldCanidateCounts, sequences.get(index));
    c -= sequences.get(index).size();
    r -= averageReferenceLength(index);
  }
  sequences.set(index, (trans == null ? null : trans.translation));
  if (trans != null) {
    incCounts(canidateCounts, trans.translation);
    c += sequences.get(index).size();
    r += averageReferenceLength(index);
  }

  return this;
}
 
Example #23
Source File: NISTMetric.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
private void initNgramWeights(List<List<Sequence<TK>>> referencesList) {
  int len = 0;
  Counter<Sequence<TK>> allNgrams = new ClassicCounter<Sequence<TK>>();
  for (List<Sequence<TK>> references : referencesList) {
    for (Sequence<TK> reference : references) {
      len += reference.size();
      Counter<Sequence<TK>> altCounts = MetricUtils.getNGramCounts(
          reference, order);
      addToCounts(allNgrams, altCounts);
    }
  }
  ngramInfo = MetricUtils.getNGramInfo(allNgrams, len);
}
 
Example #24
Source File: MERT.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public static Counter<String> summarizedAllFeaturesVector(
    List<ScoredFeaturizedTranslation<IString, String>> trans) {
  Counter<String> sumValues = new ClassicCounter<String>();

  for (ScoredFeaturizedTranslation<IString, String> tran : trans) {
    for (FeatureValue<String> fValue : tran.features) {
      sumValues.incrementCount(fValue.name, fValue.value);
    }
  }

  return sumValues;
}
 
Example #25
Source File: OneSidedObjectiveFunction.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Constructor.
 * 
 * @param input
 */
public OneSidedObjectiveFunction(ClustererState input) {
  // Setup delta data structures
  this.inputState = input;
  localWordToClass = new HashMap<>(input.vocabularySubset.size());
  deltaClassCount = new ClassicCounter<Integer>(input.numClasses);
  deltaClassHistoryCount = new TwoDimensionalCounter<Integer,NgramHistory>();
  for (IString word : input.vocabularySubset) {
    int classId = input.wordToClass.get(word);
    localWordToClass.put(word, classId);
  }
  this.objValue = input.currentObjectiveValue;
}
 
Example #26
Source File: OptimizerUtils.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public static <T> Counter<T> featureValueCollectionToCounter(Collection<FeatureValue<T>> c) {
  Counter<T> counter = new ClassicCounter<T>();
  
  for (FeatureValue<T> fv : c) {
    counter.incrementCount(fv.name, fv.value);
  }
  
  return counter;
}
 
Example #27
Source File: OptimizerUtils.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public static Counter<String> getWeightCounterFromArray(String[] weightNames,
    double[] wtsArr) {
  Counter<String> wts = new ClassicCounter<String>();
  for (int i = 0; i < weightNames.length; i++) {
    wts.setCount(weightNames[i], wtsArr[i]);
  }
  return wts;
}
 
Example #28
Source File: AdaGradFOBOSUpdater.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public AdaGradFOBOSUpdater(double initialRate, int expectedNumFeatures, double lambda, Norm norm, Counter<String> customL1, Set<String> fixedFeatures) {
  this.rate = initialRate;
  this.lambda = lambda;
  this.norm = norm;
  this.customL1 = customL1;
  this.fixedFeatures = fixedFeatures;
  
  sumGradSquare = new ClassicCounter<String>(expectedNumFeatures);
}
 
Example #29
Source File: AbstractOnlineOptimizer.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Counter<String> getBatchGradient(Counter<String> weights,
    List<Sequence<IString>> sources, int[] sourceIds,
    List<List<RichTranslation<IString, String>>> translations,
    List<List<Sequence<IString>>> references,
    double[] referenceWeights,
    SentenceLevelMetric<IString, String> scoreMetric) {
  Counter<String> batchGradient = new ClassicCounter<String>();

  for (int i = 0; i < sourceIds.length; i++) {
    if (translations.get(i).size() > 0) {
      // Skip decoder failures.
      Counter<String> unregularizedGradient = getUnregularizedGradient(weights, sources.get(i), sourceIds[i], translations.get(i), references.get(i), referenceWeights, scoreMetric);
      batchGradient.addAll(unregularizedGradient);
    }
  }

  // Add L2 regularization directly into the derivative
  if (this.l2Regularization) {
    final Set<String> features = new HashSet<String>(weights.keySet());
    features.addAll(weights.keySet());
    final double dataFraction = sourceIds.length /(double) tuneSetSize;
    final double scaledInvSigmaSquared = dataFraction/(2*sigmaSq);
    for (String key : features) {
      double x = weights.getCount(key);
      batchGradient.incrementCount(key, x * scaledInvSigmaSquared);
    }
  }

  return batchGradient;
}
 
Example #30
Source File: KBPStatisticalExtractor.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
@SuppressWarnings("UnusedParameters")
private static void denseFeatures(KBPInput input, Sentence sentence, ClassicCounter<String> feats) {
  boolean subjBeforeObj = input.subjectSpan.isBefore(input.objectSpan);

  // Type signature
  indicator(feats, "type_signature", input.subjectType + "," + input.objectType);

  // Relative position
  indicator(feats, "subj_before_obj", subjBeforeObj ? "y" : "n");
}