;;; -*- Mode: tdl; Coding: utf-8; indent-tabs-mode: nil; -*- ;;; Inbuilt tagging in PET is controlled firstly by the `taggers' setting. ;;; This controls which taggers are run in which order. The tagger names ;;; are arbitrary strings which are used as a prefix for the tagger settings ;;; that control how the taggers are run, what input they require, and how the ;;; output is processed. Relevant settings are listed below: ;;; * -command: REQ, command line for tagger ;;; * -tok_sep: REQ, string that separates tokens ;;; * -utt_sep: REQ, string that separates items ;;; * -tag_format: REQ, specifies the output of the tagger, one of: ;;; * "single" - one tag per token, format: word tag ;;; * "multi" - multiple tags per token, format: word (tag prob)+ ;;; * "genia" - genia output, format: word lemma POS chunk NE ;;; * -arguments: OPT, command line arguments for tagger ;;; * -mapping: OPT, words that should be mapped before input to tagger ;;; * -utterance-start: OPT, token that should be given to tagger ;;; before each item (for taggers that assume continuous input) ;;; * -utterance-end: OPT, token that should be given to tagger ;;; after each item (for taggers that assume continuous input) ;;; * -pos_sep: OPT, string to separate token and POS in tagger input. ;;; Non-empty pos_sep indicates that the tagger expects token and POS as ;;; input. ;;; * -namedentities: OPT, controls whether named entities are added as ;;; extra tokens. Ignored if tag_format != "genia". taggers := tnt. ;taggers := tnt stnt. tnt-command := "${LOGONROOT}/bin/tnt". tnt-arguments := "-v0 -z100 ${LOGONROOT}/coli/tnt/models/wsj -". tnt-tok_sep := " ". tnt-utt_sep := " ". tnt-tag_format := "multi". tnt-utterance-start := ".". tnt-utterance-end := ".". tnt-mapping := "“" "``" "”" "''" "‘" "`" "’" "'" "…" "..." "—" "---" "–" "--". stnt-command := "${LOGONROOT}/bin/tnt". ;stnt-arguments := "-v0 -z100 ${HOME}/work/supertagging/mapped_models/not-wsj02_model -". stnt-arguments := "-v0 -z100 ${HOME}/work/supertagging/morph_models/not-wsj02_morphmodel -". stnt-tok_sep := " ". stnt-utt_sep := " ". stnt-tag_format := "multi". stnt-utterance-start := ".". stnt-utterance-end := ".". stnt-mapping := "“" "``" "”" "''" "‘" "`" "’" "'" "…" "..." "—" "---" "–" "--". genia-command := "${LOGONROOT}/bin/geniatagger". genia-arguments := "-nt". genia-tok_sep := " ". genia-utt_sep := " ". genia-tag_format := "genia". genia-mapping := "“" "``" "”" "''" "‘" "`" "’" "'" "…" "..." "—" "---" "–" "--". genia-namedentities := no.