;;; Copyright (c) 1991--2018 ;;; John Carroll, Ann Copestake, Robert Malouf, Stephan Oepen; ;;; see `LICENSE' for conditions. ;;; modifications for YADU - April 1997 ;;; bug fixes etc 1995 ;;; July 1996 - caching greatest common subtypes ;;; bit-code field for efficient creation of glb types ;;; comment associated with type preserved - 2018 (in-package :lkb) ;;; For each type we need: ;;; ;;; name - a symbol ;;; parents - the immediate supertypes: a list of type names ;;; constraint - a feature structure stored either in a fully expanded form ;;; or just as the feature structure specific to the type ;;; tdfs - the full typed default feature structure constraint ;;; comment - text string associated with the type in the grammar file ;;; ;;; For implementation purposes we also have: ;;; ;;; constraint-mark - cache of copies of constraint for use during unification ;;; appfeats - appropriate features: a set of features which can be derived from ;;; the constraint (top-level-features-of constraint), but cached in ;;; order to type untyped feature structures efficiently ;;; ancestors - all the supertypes, immediate or otherwise ;;; descendants - all the subtypes ;;; local-constraint - the FS derived from the user-specified unifications ;;; atomic-p - true if the type has no appropriate features and none of its subtypes ;;; have any appropriate features ;;; marks - see marks.lsp ;;; daughters - the immediate subtypes: a list of type names ;;; constraint-spec - the unifications as specified by the grammarian ;;; default-spec - the default unifications as specified by the grammarian ;;; inherited-constraint - the FS after inheritance but before type inference - for ;;; debugging, and cleared after expanding all constraints ;;; glbp - true if type is an automatically created GLB type ;;; enumerated-p - false, or a list of enumerated atomic subtypes following OR in ;;; the path syntax, as specified by Copestake et al. 1999 (obsolete) ;;; shrunk-p, visible-p - for type hierarchy display ;;; bit-code - full representation of descendants, used in GLB computation (defstruct ltype constraint (constraint-mark nil) appfeats ancestors descendants local-constraint atomic-p name marks daughters parents tdfs constraint-spec default-spec inherited-constraint glbp enumerated-p shrunk-p visible-p bit-code comment) (defstruct (leaf-type (:include ltype)) (expanded-p nil)) (defmethod common-lisp:print-object ((instance ltype) stream) (if *print-readably* ;; print so object can be read back into lisp (call-next-method) ;; usual case (progn (write-string "# stream)))) #+:sbcl (declaim (sb-ext:freeze-type ltype leaf-type)) (#+:sbcl sb-ext:defglobal #-:sbcl defvar *types* (make-hash-table :test #'eq)) (declaim (type hash-table *types*)) (defparameter *ordered-type-list* nil) (defparameter *ordered-glbtype-list* nil) (defvar *types-changed* nil) (defvar *lexicon-changed* nil) (defvar *type-reload-p* nil) (defun clear-types () (clear-type-cache) ; must be done whenever types table is cleared (disable-type-interactions) (clrhash *types*) (setf *ordered-type-list* nil) (setf *ordered-glbtype-list* nil) ;; (clear-leaf-types *leaf-types*) ; no longer needed here (clear-feature-table) (clear-expanded-lex) (when (and *gc-before-reload* *type-reload-p*) #+:allegro (excl:gc t) #+:sbcl (sb-ext:gc :full t)) (setf *type-reload-p* t)) (defun clear-types-for-patching-constraints nil (clear-type-cache) (clear-feature-table) (clear-expanded-lex)) (defun clear-type-visibility () (maphash #'(lambda (name entry) (declare (ignore name)) (setf (ltype-visible-p entry) nil)) *types*)) (defun collect-type-names () (let ((type-names nil)) (maphash #'(lambda (name entry) (declare (ignore entry)) (push name type-names)) *types*) type-names)) (declaim (inline get-type-entry)) (defun get-type-entry (name) (gethash name *types*)) (defun set-type-entry (name new-entry) (setf (gethash name *types*) new-entry)) (defun remove-type-entry (name) ;; effectively invalidates type, but caller responsible for updating any relevant ;; type caches etc. (remhash name *types*)) (defun is-valid-type (x) (typecase x (null nil) (symbol (get-type-entry x)) (string t))) (defun string-type-p (type-name) ;; AAC 30/12/94 - if no string type then return nil ;; JAC 29-Nov-2024 - changed so that if no string type then fall back to top type (let ((st *string-type*)) (cond ((eq type-name st)) (st ; !!! call find not member to avoid over-zealous SBCL type inference (find (get-type-entry type-name) (the list (retrieve-ancestors st)) :test #'eq)) (t (eq type-name *toptype*))))) (defun constraint-spec-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-constraint-spec type-record) (error "~%~A is not a valid type" type-name)))) (defun constraint-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-constraint type-record) (error "~%~A is not a valid type" type-name)))) (defun tdfs-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-tdfs type-record) (error "~%~A is not a valid type" type-name)))) (defun default-spec-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-default-spec type-record) (error "~%~A is not a valid type" type-name)))) (defun appropriate-features-of (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-appfeats type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-ancestors (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-ancestors type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-descendants (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-descendants type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-parents (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-parents type-record) (error "~%~A is not a valid type" type-name)))) (defun retrieve-daughters (type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-daughters type-record) (error "~%~A is not a valid type" type-name)))) (defun subtype-p (type1 type2) ;; is type1 a strict subtype of type2? ;; robust on invalid type names: if either of the args is not a type, the function ;; returns nil and does not signal an error (cond ((not (symbolp type2)) nil) ((symbolp type1) ;; an alternative using the type unification machinery would be ;; (and (not (eq type1 type2)) (eq (greatest-common-subtype type1 type2) type1)) ;; but that assumes the args are actually types, and in practice can end up ;; polluting the cache with lots of lexical types (let ((t2 (get-type-entry type2))) (and t2 (ltype-descendants t2) ; chance to return immediately (let ((t1 (get-type-entry type1))) (and t1 (member t2 (ltype-ancestors t1) :test #'eq)))))) ((stringp type1) (string-type-p type2)))) (defun subtype-or-equal (type1 type2) ;; is type1 equal to type2 or a subtype of it? (cond ((eq type1 type2)) ((stringp type2) (and (stringp type1) (string= type1 type2))) (t (subtype-p type1 type2)))) (defun atomic-type-p (type-name) (or (stringp type-name) (let ((type-record (get-type-entry type-name))) (if type-record (ltype-atomic-p type-record) (error "~%~A is not a valid type" type-name))))) ;;; Type unification, with the results memoized for pairs of types where neither is ;;; the top type or a string type. (It would be a really bad idea to precompute a full ;;; table of results since in practice only a very small proportion of the possible ;;; pairs of types are encountered). ;;; ;;; The memoization table is akin to a hash table with open addressing and linear ;;; probing. It's keyed by a numeric combination of sxhash values of the two type ;;; names (symbols): key(t1,t2) = (sxhash(t1) XOR sxhash(t2)) MOD tablesize. ;;; XOR is ideal here since t1 /= t2 and we want key(t1,t2)=key(t2,t1). ;;; We use only the lower-order bits of sxhash values - there's no point in ;;; incorporating higher-order bits since all bits in these values should be ;;; well distributed. The two types are (re-)ordered canonically on their sxhash ;;; values so that either order in the call retrieves the same table entry. ;;; ;;; When the load factor becomes too high we could evict infrequently retrieved ;;; entries, or grow the table and rehash all entries; however, it's much simpler ;;; just to clear the table - and in practice this still gives good performance. ;;; We can tolerate a fairly high load factor since frequently accessed entries ;;; are likely to be close to the start of a collision cluster; indeed, empirically ;;; in >98% of accesses the target entry is at the expected location. (eval-when (:compile-toplevel :load-toplevel :execute) (defconstant +type-cache-size+ (expt 2 16))) (#+:sbcl sb-ext:defglobal #-:sbcl defvar *type-cache* (make-array (* +type-cache-size+ 4) :initial-element nil)) (declaim (type (simple-vector #.(* +type-cache-size+ 4)) *type-cache*)) (#+:sbcl sb-ext:defglobal #-:sbcl defvar *type-cache-count* 0) (declaim (type fixnum *type-cache-count*)) (deftype symbol-type-name () '(and symbol (not null))) (defun clear-type-cache nil ;; For consistency this cache must be cleared before (re-)loading a grammar. It's ;; probably best also to clear it after loading a grammar and also before batch ;; parsing, since different pairs of types will be exercised (fill *type-cache* nil) (setq *type-cache-count* 0)) (defun greatest-common-subtype (type1 type2 &aux (toptype *toptype*)) ;; type1 and type2 expected to be LKB type names, i.e. non-NIL symbols or strings (labels ((gcs-neq-symbols (t1 t2) (declare (symbol-type-name t1 t2)) ; guaranteed by caller (let ((h1 (sxhash t1)) (h2 (sxhash t2)) (temp t1)) ; (re-)order t1, t2; impossible for CPU to (when (> h1 h2) (setq t1 t2)) ; predict whether h1 > h2 or not, so encourage (when (> h1 h2) (setq t2 temp)) ; compiler to make operation branchless (let ((key (* (mod (logxor h1 h2) +type-cache-size+) 4)) (tc *type-cache*)) (loop (let ((e (svref tc key))) (cond ((and (eq e t1) (eq (svref tc (+ key 1)) t2)) (return (values (svref tc (+ key 2)) (svref tc (+ key 3))))) ((null e) (return (gcs-add-entry t1 t2 key))))) (setq key (dpb 0 (byte 2 0) ; make it obvious that elements key+1..3 are always in bounds (mod (+ key 4) (* +type-cache-size+ 4)))))))) (gcs-add-entry (t1 t2 key) (multiple-value-bind (subtype constraintp) (full-greatest-common-subtype t1 t2) (if (> *type-cache-count* (truncate (* +type-cache-size+ 5/6))) (clear-type-cache) ; evict everything (and then mustn't cache with this key) (let ((tc *type-cache*)) (setf (svref tc key) t1 (svref tc (+ key 1)) t2 (svref tc (+ key 2)) subtype (svref tc (+ key 3)) constraintp) (incf *type-cache-count*))) (values subtype constraintp))) (invalid-argument () (error "Inconsistency - invalid arguments ~S and ~S to GREATEST-COMMON-SUBTYPE" type1 type2))) (declare (notinline gcs-neq-symbols gcs-add-entry invalid-argument)) (cond ((or (eq type1 type2) (eq type2 toptype)) type1) ((eq type1 toptype) type2) ((typep type1 'symbol-type-name) (typecase type2 (symbol-type-name (gcs-neq-symbols type1 type2)) (string (if (string-type-p type1) type2 nil)) (t (invalid-argument)))) ((typep type2 'symbol-type-name) (typecase type1 (string (if (string-type-p type2) type1 nil)) (t (invalid-argument)))) ((and (stringp type1) (stringp type2)) (if (string= type1 type2) type1 nil)) (t (invalid-argument))))) #| ;;; investigate effectiveness of greatest common subtype cache: occupancy, collisions (float (/ (loop for i from 0 below (* +type-cache-size+ 4) by 4 count (svref *type-cache* i)) +type-cache-size+)) (loop for i from 0 below (* +type-cache-size+ 4) by 4 do (format t "~A" (if (svref *type-cache* i) 1 "-"))) (loop with stats = nil for i from 0 below (* +type-cache-size+ 4) by 4 when (svref *type-cache* i) do (let* ((t1 (svref *type-cache* i)) (t2 (svref *type-cache* (+ i 1))) (h1 (sxhash t1)) (h2 (sxhash t2)) (key (* (mod (logxor h1 h2) +type-cache-size+) 4)) (distance (/ (if (>= i key) (- i key) (+ i (- (* +type-cache-size+ 4) key))) 4))) ; (when (> distance 50) (print (list distance i key t1 t2))) (let ((x (assoc distance stats))) (if x (incf (cdr x)) (push (cons distance 1) stats)))) finally (return (sort stats #'< :key #'car))) (clear-type-cache) |# (defun full-greatest-common-subtype (type1 type2) (flet ((intersection-eq (set1 set2) (and set1 set2 (let ((set1-len (length set1)) (set2-len (length set2))) (when (> set2-len set1-len) (rotatef set1 set2) ; make set1 be the larger one (rotatef set1-len set2-len)) (if (> set2-len 32) ; trade space for time if both sets are non-trivial (let ((table (make-hash-table :test #'eq :size set2-len)) ; the smaller one (res nil)) (dolist (e2 set2) (setf (gethash e2 table) t)) (dolist (e1 set1 res) (when (gethash e1 table) (push e1 res)))) (loop for e2 in set2 ; the smaller one when (member e2 set1 :test #'eq) collect e2)))))) (let ((t1 (get-type-entry type1)) (t2 (get-type-entry type2))) (cond ((eq type1 type2) type1) ((member t2 (ltype-ancestors t1) :test #'eq) type1) ((member t1 (ltype-ancestors t2) :test #'eq) type2) (t (let ((common-subtypes (intersection-eq (ltype-descendants t1) (ltype-descendants t2)))) (when common-subtypes (let ((gcsubtypes ;; find subtype whose descendant list has just 1 element fewer (itself) ;; than full set of common subtypes, and check there's exactly one of ;; them (NB the code below assumes that cases where the gcs could be ;; type1 or type2 itself have already been covered) (loop with card-1 = (1- (length common-subtypes)) for ct in common-subtypes when (= (length (ltype-descendants ct)) card-1) collect ct))) (cond ((null gcsubtypes) (error "Type hierarchy inconsistent: ~A and ~A have common subtypes but descendant lists are contradictory" type1 type2)) ((cdr gcsubtypes) (error "Type hierarchy inconsistent: ~A and ~A have common subtypes but no unique greatest common subtype" type1 type2)) (t ;; return t as the second value if there is a constraint that may ;; have to be unified in (values (ltype-name (car gcsubtypes)) (extra-constraint-p (car gcsubtypes) t1 t2)))))))))))) (defun extra-constraint-p (gcsubtype t1 t2) ;; test whether any ancestor of the gcsubtype which isn't also an ancestor of the ;; types being unified or the gcsubtype itself introduces any extra information ;; on the constraint; return t or nil (or (and (ltype-local-constraint gcsubtype) t) (let ((t1ancs (ltype-ancestors t1)) (t2ancs (ltype-ancestors t2))) (dolist (type (ltype-ancestors gcsubtype)) (when (and (not (eq type t1)) (not (eq type t2)) (ltype-local-constraint type) (not (member type t1ancs :test #'eq)) (not (member type t2ancs :test #'eq))) (return t)))))) ;;; Type generalisation, used in FS generalisation and ambiguity packing under least common ;;; supertype generalisation. No memoisation, since invoked much less frequently than the ;;; corresponding type unification operation, and usually cheaper since in a typical ;;; grammar most types have far fewer ancestors than descendants. (defun least-common-supertype (x y &aux (st (or *string-type* *toptype*))) ;; arguments x and y expected to be LKB type names, i.e. non-NIL symbols or strings ;; JAC 29-Nov-2024 - if *string-type* set to nil then fall back to top type (flet ((lcs-neq-symbols (x y) (let ((xt (get-type-entry x)) (yt (get-type-entry y))) (cond ((member yt (ltype-ancestors xt) :test #'eq) y) ((member xt (ltype-ancestors yt) :test #'eq) x) (t ;; the least common supertype is the type that has exactly 1 fewer ancestors ;; than the cardinality of the full set of common supertypes, i.e. the lcs and ;; its ancestors completely cover the set (NB the code below assumes that ;; cases where the lcs could be x or y itself have already been covered) (loop with common-supertypes = (loop with axts = (ltype-ancestors xt) for ayt in (ltype-ancestors yt) when (member ayt axts :test #'eq) collect ayt) with card-1 = (1- (length common-supertypes)) for ct in common-supertypes when (= (length (ltype-ancestors ct)) card-1) return (ltype-name ct) finally (error "Type hierarchy inconsistent: ~A and ~A apparently have no least common supertype" x y)))))) (invalid-argument () (error "Inconsistency - invalid arguments ~S and ~S to LEAST-COMMON-SUPERTYPE" x y))) (declare (notinline lcs-neq-symbols invalid-argument)) (cond ((eq x y) (or x (invalid-argument))) ((typep x 'symbol-type-name) (typecase y (symbol-type-name (lcs-neq-symbols x y)) (string (least-common-supertype x st)) (t (invalid-argument)))) ((typep y 'symbol-type-name) (typecase x (string (least-common-supertype st y)) (t (invalid-argument)))) ((and (stringp x) (stringp y)) (if (string= x y) x st)) (t (invalid-argument))))) ;;; The following utility functions assume that no cycles are present (defun get-real-types (type) (let ((type-entry (get-type-entry type))) (if (ltype-glbp type-entry) (loop for parent in (ltype-parents type-entry) append (get-real-types parent)) (list type)))) ;;; We need a record of the maximal type at which a particular feature ;;; is introduced (defvar *feature-list* (make-hash-table :test #'eq)) (defvar *feature-minimal-type* (make-hash-table :test #'eq)) (defun clear-feature-table nil (clrhash *feature-minimal-type*) (clrhash *feature-list*)) (defun maximal-type-of (feature) (gethash feature *feature-list*)) (defun set-feature-entry (feature type) (setf (gethash feature *feature-list*) type)) (defun check-feature-table nil (let ((ok t)) (maphash #'(lambda (feature type-list) (cond ((> (length type-list) 1) (format t "~%Feature ~A is introduced at multiple types ~A" feature type-list) (setf ok nil)) (t (set-feature-entry feature (car type-list))))) *feature-list*) ok)) (defun maximal-type-of-list (features) (loop for f in features for mt = (maximal-type-of f) then (greatest-common-subtype (maximal-type-of f) mt) while mt finally (return mt))) (defun maximal-type-of-list* (&rest features) (declare (dynamic-extent features)) (maximal-type-of-list features)) ;; Remove obsolete pointers from type constraints so that the garbage ;; collector can purge the structures they point to. (defun gc-types nil (maphash #'(lambda (name type) (declare (ignore name)) (when (ltype-tdfs type) (compress-dag (tdfs-indef (ltype-tdfs type)))) (compress-dag (ltype-constraint type)) (setf (ltype-constraint-mark type) nil) (compress-dag (ltype-local-constraint type))) *types*)) ;;; Try to reduce the amount of space used by the expanded type hierarchy (defun clear-glbs nil (gc-types) (maphash #'(lambda (name type) (when (eql (mismatch "GLBTYPE" (symbol-name name)) 7) (setf (ltype-constraint type) nil) (setf (ltype-tdfs type) nil))) *types*)) (defun used-types (type) (let ((used (mapcar #'(lambda (x) (u-value-type (unification-rhs x))) (ltype-constraint-spec type)))) (when used (remove-duplicates used)))) (defun purge-constraints nil (gc-types) (let* ((leaves (mapcar #'(lambda (x) (gethash x *types*)) (slot-value *leaf-types* 'leaf-types))) (parents (reduce #'union (mapcar #'ltype-parents leaves))) (referred (reduce #'union (mapcar #'used-types leaves))) (save (union parents referred))) (maphash #'(lambda (name type) (unless (member (symbol-name name) save) ;; (setf (ltype-constraint type) nil) (setf (ltype-tdfs type) nil))) *types*))) (defun types-to-xml (&key (stream t) file) (loop with stream = (if file (open file :direction :output :if-exists :supersede :if-does-not-exist :create) stream) for type being each hash-value in *types* for name = (ltype-name type) for parents = (ltype-parents type) for daughters = (ltype-daughters type) do (format stream "~% ~%" name) (loop for parent in parents do (format stream " ~%" parent)) (format stream " ~% ~%") (loop for daughter in daughters do (format stream " ~%" daughter)) (format stream " ~%~%") finally (when file (close stream))))