module TextlabOBTStat
# @abstract Subclass and override {#write}, {#write_postamble} and potentially
# {#write_sentence_header}/#{write_sentence_footer} to implement an output
# formatting class that can be passed to Disambiguator.
class Writer
# @option opts [IO, StringIO] file IO instance to which output is written.
# @option opts [Symbol] xml Include sentence segmentation tags in output
def initialize(opts={})
@file = opts[:file] || $stdout
@xml = opts[:xml] || nil
end
##
# Format and output the given word, its annotation and preamble text if needed.
#
# @param [Word] word
def write(word)
raise NotImplementedError
end
##
# Format and output postamble text after annotated text if needed.
#
# @param [Text] text
# @todo Doesn't need to use the word for current supported formats. Some formats (treetagger) do, but then
# Disambiguator needs to call the Writer interface differently eg. in Disambiguator.disambiguate_word.
def write_postamble(text)
raise NotImplementedError
end
# Write sentence start marker based on the passed Sentence instance.
# @param [Sentence] sentence Current sentence.
def write_sentence_header(sentence)
# default is to output XML sentence delimiter if requested
if @xml
@file.puts(xml_start_tag(sentence))
end
end
# @private
def xml_start_tag(sentence)
if sentence.attrs
attr_str = sentence.attrs.keys.collect { |attr| "#{attr.to_s}=\"#{sentence.attrs[attr]}\"" }.join(" ")
""
else
""
end
end
# Write sentence end marker based on the passed Sentence instance.
# @param [Sentence] sentence Current sentence.
def write_sentence_footer(sentence)
# default is to output XML sentence delimiter if requested
if @xml
@file.puts("")
end
end
end
# Echo the input as closely as possible.
class InputWriter < Writer
def write(word)
tag = word.get_selected_tag
word.preamble.each { |str| @file.puts str } if word.preamble
@file.puts word.input_string
@file.puts tag.input_string
end
def write_postamble(text)
@file.puts text.postamble
end
def write_sentence_header(sentence)
super
# include XML sentence tags even if they're not asked for
# @todo tag may precede preamble even if this differs from input
if @xml.nil? and sentence.attrs
@file.puts(xml_start_tag(sentence))
end
end
def write_sentence_footer(sentence)
super
# include XML sentence tags even if they're not asked for
if @xml.nil? and sentence.attrs
@file.puts("")
end
end
end
# Tabular format with token, tag and lemma tab separated
class VRTWriter < Writer
def write(word)
tag = word.get_selected_tag
@file.puts "#{word.output_string}\t#{tag.lemma}\t#{tag.clean_out_tag}"
end
#noinspection RubyUnusedLocalVariable
def write_postamble(text)
# No postamble in VRT output
end
def write_sentence_header(sentence)
super(sentence)
end
def write_sentence_footer(sentence)
super(sentence)
# empty line separates sentences unless xml sentence delimiters are requested.
@file.puts unless @xml
end
end
# @deprecated
class MarkWriter < InputWriter
def write(word)
word.preamble.each { |str| @file.puts str } if word.preamble
@file.puts word.input_string
word.tags.each do |tag|
@file.write tag.input_string.rstrip
if tag.selected
@file.write ' '
end
if tag.selected and not tag.correct
@file.write ' '
end
@file.puts
end
end
end
end