;;; Hey, emacs(1), this is -*- Mode: Common-Lisp; -*-, got it? ;;; ;;; PAL --- PCFG Approximation and Parsing Library for DELPH-IN ;;; ;;; Copyright (c) 2009 -- 2012 Johan Benum Evensberget (johan.benum@gmail.com) ;;; Copyright (c) 2009 -- 2012 Stephan Oepen (oe@ifi.uio.no) ;;; ;;; This program is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU Lesser General Public License as published by ;;; the Free Software Foundation; either version 2.1 of the License, or (at ;;; your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, but WITHOUT ;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ;;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public ;;; License for more details. ;;; (in-package :pcfg) (defstruct agenda (size 0) data) (defun agenda-enqueue (agenda score item) (if (agenda-data agenda) (if (>= score (first (first (agenda-data agenda)))) (setf (agenda-data agenda) (cons (cons score item) (agenda-data agenda))) (loop for data on (agenda-data agenda) for next = (first (rest data)) when (or (null next) (>= score (first next))) do (setf (rest data) (cons (cons score item) (rest data))) and return nil)) (setf (agenda-data agenda) (list (cons score item)))) (incf (agenda-size agenda))) (defun agenda-empty-p (agenda) (= (agenda-size agenda) 0)) (defun agenda-dequeue (agenda) (when (agenda-data agenda) (decf (agenda-size agenda)) (rest (pop (agenda-data agenda))))) (defun agenda-if (test agenda &key key) (loop for entry in (agenda-data agenda) when (funcall test (if key (funcall key (rest entry)) (rest entry))) return entry)) (defstruct unpacking decompositions hypotheses instantiations (agenda (make-agenda))) (defmethod print-object ((object unpacking) stream) (format stream "#[U <~{~a~^ ~}>]" (unpacking-decompositions object))) (defstruct decomposition lhs rhs done) (defmethod print-object ((object decomposition) stream) (format stream "#[D ~(~a~) < ~(~{~a ~^~}~)>]" (decomposition-lhs object) (decomposition-rhs object))) (defmacro decomposition-record-indices (decomposition indices) `(push ,indices (decomposition-done ,decomposition))) (defmacro decomposition-indices-done-p (decomposition indices) `(member ,indices (decomposition-done ,decomposition) :test #'equal)) (defun indices<= (indices1 indices2) (loop for index1 in indices1 for index2 in indices2 always (<= index1 index2))) (let ((hypotheses 0)) (defun unpack-initialize () (setf hypotheses 0)) (defun new-hypothesis-id () (let ((n hypotheses)) (incf hypotheses) n))) (defstruct hypothesis (id (new-hypothesis-id)) score decomposition indices parents daughters edge) (defmethod print-object ((object hypothesis) stream) (format stream "#[H [~a] ~a~@[ ~a~]]" (hypothesis-id object) (hypothesis-indices object) (hypothesis-edge object))) (defun new-hypothesis (decomposition indices daughters) (let ((new (make-hypothesis :decomposition decomposition :indices indices :daughters daughters))) (loop for daughter in daughters do (push new (hypothesis-parents daughter))) new)) (defun selectively-unpack-edges (edges &optional (n 1)) #+:debug (setf %edges edges) (let ((representative (first edges))) (hypothesize-edge representative 0 :top (or (rest edges) t)) (or (loop for i from 0 for hypothesis = (hypothesize-edge representative i) for new = (when hypothesis (instantiate-hypothesis hypothesis)) while (and hypothesis (>= n 1)) when new do (decf n) and collect new)))) (defun hypothesize-edge (edge i &key top agenda) ;; ;; returns expected score for .i.-th instantiation of this .edge., where some ;; of these might turn out inconsistent later. whenever we are called with a ;; new (aka previously unseen) value for .i., we assume it is the immediately ;; following index from the previous call, i.e. we will search for the next ;; best hypothesis. ;; (when (null (edge-unpacking edge)) (unless (= i 0) (error "hypothesize-edge(): first time call with i == ~a" i)) (let* ((unpacking (make-unpacking)) (agenda (or agenda (unpacking-agenda unpacking)))) (setf (edge-unpacking edge) unpacking) (decompose-edge edge) (loop for decomposition in (unpacking-decompositions unpacking) for n = 0 for daughters = (loop for edge in (decomposition-rhs decomposition) do (incf n) collect (hypothesize-edge edge 0)) for indices = (make-list n :initial-element 0) for hypothesis = (new-hypothesis decomposition indices daughters) for score = (score-hypothesis hypothesis) do (decomposition-record-indices decomposition indices) (agenda-enqueue agenda score hypothesis)) ;; ;; for the special case that we are working on `top' edges, i.e. those in ;; the list of edges returned by the parser. for these, we need to make ;; sure that decompositions corresponding to top-level packings of these ;; edges are hypothesized into the agenda of the host edge, and that we ;; invoke the same procedure on local alternates i.e. other arguments to ;; selectively-unpack-edges() that happen to not be the one ;; representative edge. ;; (when top (loop for edge in (edge-alternates edge) do (hypothesize-edge edge 0 :agenda agenda)) (when (consp top) (loop for edge in top do (hypothesize-edge edge 0 :agenda agenda :top t)))))) (let* ((unpacking (edge-unpacking edge)) (agenda (unpacking-agenda unpacking)) (hypothesis (when unpacking (nth i (unpacking-hypotheses unpacking))))) (if hypothesis ;; ;; in case we have hypothesized this decomposition before, just reuse it; ;; hypothesis ;; ;; otherwise, retrieve the current best candidate, try generating new ;; hypotheses from `vertical' search, i.e. advancing either one of the ;; daughter indices on the current best, put those on the agenda, and ;; return the one just retrieved. ;; (unless (agenda-empty-p agenda) (let* ((hypothesis (agenda-dequeue agenda)) (indiceses (loop for foo on (hypothesis-indices hypothesis) collect (append prefix (cons (+ (first foo) 1) (rest foo))) collect (first foo) into prefix))) (loop with decomposition = (hypothesis-decomposition hypothesis) for indices in indiceses for daughters = (unless (or (decomposition-indices-done-p decomposition indices) (agenda-if #'(lambda (foo) (and (eq (hypothesis-decomposition foo) decomposition) (indices<= (hypothesis-indices foo) indices))) agenda)) (loop for edge in (decomposition-rhs decomposition) for i in indices for daughter = (hypothesize-edge edge i) when (null daughter) return nil collect daughter)) for new = (when daughters (new-hypothesis decomposition indices daughters)) when new do (decomposition-record-indices decomposition indices) (agenda-enqueue agenda (score-hypothesis new) new)) (setf (unpacking-hypotheses unpacking) (nconc (unpacking-hypotheses unpacking) (list hypothesis))) hypothesis))))) (defun decompose-edge (edge) ;; ;; entirely called for its side effect: populate `decomposition' set in the ;; `unpacking' record of .edge. ;; (with-slots (unpacking children) edge (if (null children) (push (make-decomposition :lhs edge) (unpacking-decompositions unpacking)) (loop for child in children collect (cons child (edge-alternates child)) into foo finally (loop for rhs in (cross-product foo) for decomposition = (make-decomposition :lhs edge :rhs rhs) do (push decomposition (unpacking-decompositions unpacking))))))) (defun score-hypothesis (hypothesis) (setf (hypothesis-score hypothesis) (+ (loop for daughter in (hypothesis-daughters hypothesis) for score = (or (hypothesis-score daughter) (score-hypothesis daughter)) sum score) (let* ((decomposition (hypothesis-decomposition hypothesis)) (sponsor (edge-sponsor (decomposition-lhs decomposition)))) (typecase sponsor (rule (rule-log-prob sponsor)) (lexeme (lexeme-log-prob sponsor)) ;; ;; _fix_me_ ;; in the future (i.e. when parsing from a lexical lattice where only ;; the full lexical items have been annotated), leaf nodes can be PCFG ;; edges that correspond to full lexical items, which should then have ;; the original edge from the input lexical lattice as their sponsor. ;; in one were to look for a probability distribution over these, one ;; should turn to something ubertagging-like. (26-jun-12; johan & oe) ;; (lkb::edge 0)))))) (defun instantiate-hypothesis (hypothesis) hypothesis #+:null (let ((cache (hypothesis-edge hypothesis))) (cond (cache (unless (eq cache :fail) cache)) ((null (hypothesis-daughters hypothesis)) (let* ((decomposition (hypothesis-decomposition hypothesis)) (edge (decomposition-lhs decomposition))) ;; _fix_me_ (when (edge-odag edge) (setf (edge-dag edge) (edge-odag edge))) (setf (edge-score edge) (hypothesis-score hypothesis)) (setf (hypothesis-edge hypothesis) edge))) (t (setf (hypothesis-edge hypothesis) (let* ((children (loop for daughter in (hypothesis-daughters hypothesis) for child = (instantiate-hypothesis daughter) when (null child) return nil collect child))) (if children (with-unification-context (ignore) (loop with score = (hypothesis-score hypothesis) with decomposition = (hypothesis-decomposition hypothesis) with edge = (decomposition-lhs decomposition) with rule = (edge-rule edge) ;; _fix_me_ with paths = (rest (rule-order rule)) with result = (rule-full-fs rule) for path in paths for child in children for tdfs = (edge-dag child) while result do (setf result (yadu! result tdfs path)) finally (when result (setf result (restrict-and-copy-tdfs result))) (return (if result (make-edge :id (new-edge-id) :score score :rule rule :dag result :from (edge-from edge) :to (edge-to edge) :children children) :fail)))) :fail))) (let ((result (hypothesis-edge hypothesis))) (unless (eq result :fail) result)))))) (defun hypothesis-to-tree (hyp) (edge-to-tree (decomposition-lhs (hypothesis-decomposition hyp)))) (defun uncollapsep (tree) (and (numberp (gethash (first tree) *TT-table*)) ;; word or collapsed node (atom (second tree)))) ;; second node is a word (defun uncollapse (tree &key surface) (let* ((root (car tree)) (word (second tree)) (symbol (code-to-symbol root)) (nodes (cl-ppcre:split "/" symbol))) (labels ((build-tree (nodes) (if nodes (cons (intern (car nodes) :lkb) (list (build-tree (rest nodes)))) (if surface (code-to-symbol word) word)))) (build-tree nodes)))) (defun normalize-tree (tree &key surface) (if (atom tree) (if surface (code-to-symbol tree) tree) (if (uncollapsep tree) (uncollapse tree :surface surface) (cons (car tree) (mapcar (lambda (x) (normalize-tree x :surface surface)) (rest tree)))))) (defun tt-reduce (tree) (maptree (lambda (x) (gethash x *tt-table*)) tree))