package edu.uw.easysrl.semantics.lexicon; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import com.google.common.collect.ImmutableSet; import edu.uw.easysrl.dependencies.Coindexation; import edu.uw.easysrl.dependencies.ResolvedDependency; import edu.uw.easysrl.dependencies.SRLFrame; import edu.uw.easysrl.dependencies.SRLFrame.SRLLabel; import edu.uw.easysrl.semantics.AtomicSentence; import edu.uw.easysrl.semantics.ConnectiveSentence; import edu.uw.easysrl.semantics.ConnectiveSentence.Connective; import edu.uw.easysrl.semantics.Constant; import edu.uw.easysrl.semantics.LambdaExpression; import edu.uw.easysrl.semantics.Logic; import edu.uw.easysrl.semantics.OperatorSentence; import edu.uw.easysrl.semantics.OperatorSentence.Operator; import edu.uw.easysrl.semantics.QuantifierSentence; import edu.uw.easysrl.semantics.QuantifierSentence.Quantifier; import edu.uw.easysrl.semantics.SemanticType; import edu.uw.easysrl.semantics.Sentence; import edu.uw.easysrl.semantics.SkolemTerm; import edu.uw.easysrl.semantics.Variable; import edu.uw.easysrl.syntax.grammar.Category; import edu.uw.easysrl.syntax.grammar.Combinator.RuleType; import edu.uw.easysrl.syntax.grammar.SyntaxTreeNode; import edu.uw.easysrl.syntax.parser.SRLParser.CCGandSRLparse; /** * Automatically constructs a default semantics for a word based on its category * */ public class DefaultLexicon extends Lexicon { private static final Constant ANSWER = new Constant("ANSWER", SemanticType.E); /** * Useful to distinguish auxiliary and implicative verbs, which have the same category (S\NP)/(S\NP). */ private final ImmutableSet<String> auxiliaryVerbs = ImmutableSet.of("be", "do", "have", "go"); private final String ARG = "ARG"; // POS tags for content words. private final static Set<String> contentPOS = ImmutableSet.of("NN", "VB", "JJ", "RB", "PR", "RP", "IN", "PO"); private final static List<SRLLabel> noLabels = Arrays.asList(SRLFrame.NONE, SRLFrame.NONE, SRLFrame.NONE, SRLFrame.NONE, SRLFrame.NONE, SRLFrame.NONE); @Override public Logic getEntry(final String word, final String pos, final Category category, final Coindexation coindexation, final Optional<CCGandSRLparse> parse, final int wordIndex) { final String lemma = getLemma(word, pos, parse, wordIndex); if (Category.valueOf("conj|conj").matches(category)) { // Special case, since we have other rules to deal with this non-compositionally. return new Constant(lemma, SemanticType.E); } final List<SRLLabel> labels; if (parse.isPresent()) { final List<ResolvedDependency> deps = parse.get().getOrderedDependenciesAtPredicateIndex(wordIndex); // Find the semantic role for each argument of the category. labels = deps.stream().map(x -> x == null ? SRLFrame.NONE : x.getSemanticRole()) .collect(Collectors.toList()); } else { labels = noLabels; } final HeadAndArguments headAndArguments = new HeadAndArguments(category, coindexation); return LambdaExpression.make( getEntry(lemma, category, coindexation, headAndArguments.argumentVariables, headAndArguments.headVariable, null, headAndArguments.coindexationIDtoVariable, isContentWord(lemma, pos, category), labels), headAndArguments.argumentVariables); } private boolean isContentWord(final String word, final String pos, final Category category) { if (!(pos.length() > 1 && contentPOS.contains(pos.substring(0, 2)))) { return false; } else if (auxiliaryVerbs.contains(word) && (Category.valueOf("(S\\NP)/(S\\NP)").matches(category) || Category.valueOf("(S[q]/(S\\NP))/NP") .matches(category))) { // Auxiliary verbs return false; } else { return true; } } /** * Recursively builds up logical form, add interpretations for arguments from right to left. */ private Logic getEntry(final String word, final Category category, final Coindexation coindexation, final List<Variable> vars, final Variable head, final Sentence resultSoFar, final Map<Coindexation.IDorHead, Variable> idToVar, final boolean isContentWord, final List<SRLLabel> labels) { SRLLabel label = labels.size() == 0 ? SRLFrame.NONE : labels.get(labels.size() - 1); if (category.getNumberOfArguments() > 0 && category.getLeft().isModifier() && labels.size() > 1 && labels.get(labels.size() - 2) != SRLFrame.NONE) { // For transitive modifier categories, like (S\S)/NP or ((S\NP)\(S\NP)/NP move the semantic role to the // argument. // Hacky, but simplifies things a lot. label = labels.get(labels.size() - 2); labels.set(labels.size() - 2, SRLFrame.NONE); } if (Category.valueOf("(NP|NP)|(NP|NP)").matches(category) || Category.valueOf("(PP|PP)|(PP|PP)").matches(category)) { // Hate these categories soo much. return head; } // A label for this argument, either a semantic role or "ARG". final String argumentLabel = (label == SRLFrame.NONE) ? ARG : label.toString(); if (category.getNumberOfArguments() == 0) { // Base case. N, S, NP etc. if (Category.N.matches(category) || Category.S.matches(category.getArgument(0))) { return ConnectiveSentence .make(Connective.AND, resultSoFar != null && (word == null || !isContentWord) ? null : new AtomicSentence(word, head), resultSoFar); } else if (category == Category.NP || category == Category.PP) { if (resultSoFar == null) { // Pronouns, named-entities, etc. are just constants. return new Constant(word, SemanticType.E); } // NP/N --> sk(#x . p(x)) return new SkolemTerm(new LambdaExpression(resultSoFar, head)); } else if (category == Category.PR) { return new Constant(word, SemanticType.E); } else { return new Constant(word, SemanticType.E); } } else if (isFunctionIntoEntityModifier(category) && category.getNumberOfArguments() == 1) { if (resultSoFar == null) { // PP/NP --> #x . x return head; } else { // Functions into NP\NP get special treatment. // (NP\NP)/(S\NP) --> #p#x . sk(#y . p(y) & x=y) final Variable y = new Variable(SemanticType.E); return new SkolemTerm(new LambdaExpression(ConnectiveSentence.make(Connective.AND, new AtomicSentence( equals, head, y), resultSoFar), y)); } } else { final Variable predicate = vars.get(0); if (category.isModifier() && coindexation.isModifier()) { // Modifier categories. // (S\NP)/(S\NP) --> #p#x#e . lemma(e) & p(x,e) // S/S --> #p#e . lemma(e) & p(e) // (S/S)/(S/S) --> #p#q#e . lemma(e) & p(e) & q(p, e) final Sentence modifier; final AtomicSentence px = new AtomicSentence(predicate, vars.subList(1, vars.size())); if (resultSoFar != null || word == null || !isContentWord) { // Non-content word modifiers modifier = null; } else if (label == SRLFrame.NONE) { // No semantic role: foo(e) modifier = new AtomicSentence(word, head); } else if (label == SRLFrame.NEG) { // Special case negation return ConnectiveSentence.make(Connective.AND, new OperatorSentence(Operator.NOT, px), resultSoFar); } else if (label.isCoreArgument()) { // Core Semantic role: "raging bull" --> #x . bull(x) & \exists e[rage(e) & AO(x,e)] final Variable event = new Variable(SemanticType.Ev); modifier = new QuantifierSentence(Quantifier.EXISTS, event, ConnectiveSentence.make(Connective.AND, new AtomicSentence(word, event), new AtomicSentence(argumentLabel, head, event))); } else { // Adjunct Semantic role: TMP(yesterday, e) modifier = new AtomicSentence(argumentLabel, new Constant(word, SemanticType.E), head); } return ConnectiveSentence.make(Connective.AND, px, modifier, resultSoFar); } else { return getEntryComplexCategories(word, category, coindexation, vars, head, resultSoFar, idToVar, isContentWord, labels, label, argumentLabel); } } } private Logic getEntryComplexCategories(final String word, final Category category, final Coindexation coindexation, final List<Variable> vars, final Variable head, final Sentence resultSoFar, final Map<Coindexation.IDorHead, Variable> idToVar, final boolean isContentWord, final List<SRLLabel> labels, final SRLLabel label, final String argumentLabel) { // Other complex categories. final Category argument = category.getRight(); final Variable predicate = vars.get(0); Sentence resultForArgument; if (isWhQuestion(category) && (argument.equals(Category.NP) || argument.equals(Category.PP))) { // wh-questions. The NP argument is the answer. resultForArgument = ConnectiveSentence.make(Connective.AND, new AtomicSentence(equals, predicate, ANSWER), resultSoFar); } else if (isDuplicateArgument(coindexation)) { // Avoid creating duplicate argument. resultForArgument = resultSoFar; } else if (argument.getNumberOfArguments() == 0) { resultForArgument = getEntryComplexCategoriesWithAtomicArgument(word, category, head, resultSoFar, isContentWord, label, argumentLabel, predicate); } else if (Category.valueOf("NP|NP").matches(argument) || Category.valueOf("NP|N").matches(argument) || Category.valueOf("(N\\N)/NP").matches(argument) || Category.valueOf("(NP|NP)|(NP|NP)").matches(argument)) { // Very rare and weird (NP/NP) arguments. Discard these. resultForArgument = resultSoFar; } else { resultForArgument = getEntryComplexCategoryWithComplexArgument(category, coindexation, head, resultSoFar, idToVar, argumentLabel, predicate); } // Recursively build up the next interpretation for the other arguments. return getEntry(word, category.getLeft(), coindexation.getLeft(), vars.subList(1, vars.size()), head, resultForArgument, idToVar, isContentWord, labels.subList(0, labels.size() - 1)); } private Sentence getEntryComplexCategoryWithComplexArgument(final Category category, final Coindexation coindexation, final Variable head, final Sentence resultSoFar, final Map<Coindexation.IDorHead, Variable> idToVar, final String argumentLabel, final Variable predicate) { final Category argument = category.getRight(); Sentence resultForArgument; // Complex argument, e.g. ((S[dcl]\NP)/NP_1)/(S[to]\NP_1) // Build up a list of the arguments of the argument. If these arguments aren't supplied elsewhere, // we // need to quantify them. final List<Logic> argumentsOfArgument = new ArrayList<>(argument.getNumberOfArguments()); final List<Variable> toQuantify = new ArrayList<>(); Coindexation coindexationOfArgument = coindexation.getRight(); for (int i = argument.getNumberOfArguments(); i > 0; i--) { // Iterate over arguments of argument. Logic argumentSemantics; final Coindexation.IDorHead id = coindexationOfArgument.getRight().getID(); if (idToVar.containsKey(id)) { // Argument is co-indexed with another argument. final Variable coindexedVar = idToVar.get(id); final SemanticType variableType = coindexedVar.getType(); final SemanticType expectedType = SemanticType.makeFromCategory(argument.getArgument(i)); if (variableType == SemanticType.EtoT && expectedType == SemanticType.E) { // NP argument coindexed with N if (isWhQuestion(category)) { // Question where the N is the answer, as in Which dog barks? // sk(#x.p(x) & x=ANSWER) final Variable x = new Variable(SemanticType.E); argumentSemantics = new SkolemTerm(new LambdaExpression(ConnectiveSentence.make(Connective.AND, new AtomicSentence(coindexedVar, x), new AtomicSentence(equals, x, ANSWER)), x)); } else { // sk(#x.p(x)) argumentSemantics = makeSkolem(coindexedVar); } } else { argumentSemantics = idToVar.get(id); } } else if (isWhQuestion(category) && argument.getArgument(i) == Category.NP && resultSoFar == null) { // Categories like: S[wq]/(S[dcl\NP) argumentSemantics = ANSWER; } else { // Not coindexed - make a new variable, and quantify it later. final Category argumentOfArgument = argument.getArgument(i); final Variable var = new Variable(SemanticType.makeFromCategory(argumentOfArgument)); toQuantify.add(var); argumentSemantics = var; } argumentsOfArgument.add(argumentSemantics); coindexationOfArgument = coindexationOfArgument.getLeft(); } // We need to add an extra variable for functions into N and S // e.g. an S\NP argument p needs arguments p(x, e) if (argument.getArgument(0) == Category.N || Category.S.matches(argument.getArgument(0))) { final Coindexation.IDorHead id = coindexationOfArgument.getID(); if (idToVar.containsKey(id)) { argumentsOfArgument.add(idToVar.get(id)); } else { final Variable var = new Variable(argument.getArgument(0) == Category.N ? SemanticType.E : SemanticType.Ev); toQuantify.add(var); argumentsOfArgument.add(var); } } Sentence argumentSemantics = new AtomicSentence(predicate, argumentsOfArgument); if (toQuantify.contains(argumentsOfArgument.get(argumentsOfArgument.size() - 1)) && !argumentsOfArgument.contains(head)) { // Link the head of the argument back to the head of the construction. argumentSemantics = ConnectiveSentence.make(Connective.AND, argumentSemantics, new AtomicSentence( argumentLabel, argumentsOfArgument.get(argumentsOfArgument.size() - 1), head)); } // Existentially quantify any missing variables for (final Variable var : toQuantify) { argumentSemantics = new QuantifierSentence(Quantifier.EXISTS, var, argumentSemantics); } resultForArgument = ConnectiveSentence.make(Connective.AND, argumentSemantics, resultSoFar); return resultForArgument; } private Sentence getEntryComplexCategoriesWithAtomicArgument(final String word, final Category category, final Variable head, final Sentence resultSoFar, final boolean isContentWord, final SRLLabel label, final String argumentLabel, final Variable predicate) { final Category argument = category.getRight(); Sentence resultForArgument; // Atomic argument, e.g. S\NP if (argument.equals(Category.NP) || argument.equals(Category.PP)) { if (category.isFunctionIntoModifier()) { // No semantic role --> on:(S\S)/NP --> #x#p#e . p(e) & on(x,e) // With semantic role --> on:(S\S)/NP --> #x#p#e . p(e) & TMP(x,e) resultForArgument = ConnectiveSentence.make(Connective.AND, new AtomicSentence( label == SRLFrame.NONE ? word : argumentLabel, predicate, head), resultSoFar); } else { // (S\NP)/NP --> ... & arg(y, x) resultForArgument = isContentWord ? ConnectiveSentence.make(Connective.AND, new AtomicSentence( argumentLabel, predicate, head), resultSoFar) : resultSoFar; } } else if (Category.N.matches(argument)) { if (head.getType() == SemanticType.Ev) { // S/N --> #p#e . ARG(sk(#x.p(x)),e) resultForArgument = ConnectiveSentence.make(Connective.AND, new AtomicSentence(argumentLabel, makeSkolem(predicate), head), resultSoFar); } else { // NP/N --> ... & p(x) resultForArgument = ConnectiveSentence.make(Connective.AND, new AtomicSentence(predicate, head), resultSoFar); } } else if (argument.equals(Category.PR) || argument.equals(Category.NPthr) || argument.equals(Category.NPexpl)) { // Semantically vacuous arguments resultForArgument = resultSoFar; } else if (Category.S.matches(argument)) { // N/S --> ... & p(ev) final Variable ev = new Variable(SemanticType.Ev); resultForArgument = ConnectiveSentence.make( Connective.AND, new QuantifierSentence(Quantifier.EXISTS, ev, ConnectiveSentence.make(Connective.AND, new AtomicSentence(predicate, ev), new AtomicSentence(argumentLabel, ev, head))), resultSoFar); } else { throw new IllegalStateException(); } return resultForArgument; } private boolean isWhQuestion(final Category category) { return category.isFunctionInto(Category.valueOf("S[wq]")) && !category.isFunctionInto(Category.valueOf("S[dcl]")) // Hacky way of avoiding matches to S[X] && !category.isFunctionIntoModifier(); } private SkolemTerm makeSkolem(final Logic predicate) { final Variable x = new Variable(SemanticType.E); return new SkolemTerm(new LambdaExpression(new AtomicSentence(predicate, x), x)); } /* * (non-Javadoc) * * @see * edu.uw.easysrl.semantics.lexicon.AbstractLexicon#isMultiWordExpression(edu.uw.easysrl.syntax.grammar.SyntaxTreeNode * ) */ @Override public boolean isMultiWordExpression(final SyntaxTreeNode node) { if (node.getLeaves().stream().allMatch(x -> x.getPos().startsWith("NNP"))) { // Analyze "Barack Obama" as barack_obama, not #x.barack(x)&obama(x) return true; } else if (node.getCategory() == Category.CONJ && node.getRuleType() != RuleType.LP && node.getRuleType() != RuleType.RP) { // Don't bother trying to do multi-word conjunctions compositionally (e.g. "as well as"). return true; } return false; } /** * Looks for cases like (S_1/(S_1\NP_2))/NP_2 or (NP_1/(N_1/PP_2))\NP_2 where we don't want to make _2 an argument * of _1 twice. */ private boolean isDuplicateArgument(Coindexation indexation) { final Coindexation argument = indexation.getRight(); while (indexation.getLeft() != null) { indexation = indexation.getLeft(); Coindexation otherArgument = indexation.getRight(); if (otherArgument != null && indexation.getLeftMost().getID().equals(otherArgument.getLeftMost().getID())) { while (otherArgument.getLeft() != null) { if (argument.getID().equals(otherArgument.getRight().getID())) { return true; } otherArgument = otherArgument.getLeft(); } } } return false; } }