#include "mrsReader.h" #include #include #include #include #include using namespace std; tMRS::tMRS(string input, bool db) { debug = db; string rest = input; //parse MRS parseChar('[', rest); parseID("LTOP:", rest); ltop = parseVar(rest); parseID("INDEX:", rest); index = parseVar(rest); if (rest.at(0) == '[') arg0s[index] = parseProps(rest); parseID("RELS:", rest); parseChar('<', rest); while (rest.at(0) == '[') parseRel(rest); parseChar('>', rest); parseID("HCONS:", rest); parseChar('<', rest); while (parseHCONS(rest)); parseChar('>', rest); parseChar(']', rest); if(!rest.empty()) cerr << "ignoring trailing data: \"" << rest << "\"" << endl; //find and count all valid triples counts["ALL"] = 0; counts["NAMES"] = 0; counts["ARGS"] = 0; counts["PROPS"] = 0; counts["ARG0"] = 0; ATriple at; PTriple pt; for (vector::iterator iter = rels.begin(); iter != rels.end(); ++iter) { //each rel has a NAME triple int relno = iter - rels.begin(); counts["ALL"]++; counts["NAMES"]++; if (spanToRel.count((*iter)->span) == 0) spanToRel[(*iter)->span] = vector(); spanToRel[(*iter)->span].push_back(relno); if ((*iter)->type == QUANT) { //add quantifier relations as ARG0 triples if (arg0ToRel.count((*iter)->arg0)) {//skip unbound quantifiers at.first = relno; at.second = pair(string("ARG0"), arg0ToRel[(*iter)->arg0]); argtriples.push_back(at); counts["ARG0"]++; counts["ARGS"]++; counts["ALL"]++; //index all ARG triples by span if (spanToATriple.count((*iter)->span) == 0) spanToATriple[(*iter)->span] = vector(); spanToATriple[(*iter)->span] .push_back(argtriples.size()-1); } } else { for (map::iterator r = (*iter)->roles->begin(); r != (*iter)->roles->end(); ++r) {//other ARG triples if (arg0s.count(r->second) && arg0ToRel.count(r->second)) { //the role variable is a normal ARG0 var of a rel at.first = relno; at.second = pair(r->first, arg0ToRel[r->second]); argtriples.push_back(at); if (counts.count(r->first) == 0) counts[r->first] = 0; counts[r->first]++; counts["ARGS"]++; counts["ALL"]++; if (spanToATriple.count((*iter)->span) == 0) spanToATriple[(*iter)->span] = vector(); spanToATriple[(*iter)->span] .push_back(argtriples.size()-1); } else { if (lblToArg0.count(r->second)) { //role variable is the label of a rel at.first = relno; at.second = pair(r->first, arg0ToRel[lblToArg0[r->second]]); argtriples.push_back(at); if (counts.count(r->first) == 0) counts[r->first] = 0; counts[r->first]++; counts["ARGS"]++; counts["ALL"]++; if (spanToATriple.count((*iter)->span) == 0) spanToATriple[(*iter)->span] = vector(); spanToATriple[(*iter)->span] .push_back(argtriples.size()-1); } else { if (hcons.count(r->second) && lblToArg0.count(hcons[r->second]) && arg0ToRel.count(lblToArg0[hcons[r->second]])) { //role variable is qeq with a label of a rel at.first = relno; at.second = pair(r->first, arg0ToRel[lblToArg0[hcons[r->second]]]); argtriples.push_back(at); if (counts.count(r->first) == 0) counts[r->first] = 0; counts[r->first]++; counts["ARGS"]++; counts["ALL"]++; if (spanToATriple.count((*iter)->span) == 0) spanToATriple[(*iter)->span] = vector(); spanToATriple[(*iter)->span] .push_back(argtriples.size()-1); }//else role variable doesn't link to anything } //so we skip this triple } } if (arg0s.count((*iter)->arg0) > 0 && arg0s[(*iter)->arg0] != NULL) { //count feature triples for (map::iterator p = arg0s[(*iter)->arg0]->begin(); p != arg0s[(*iter)->arg0]->end(); ++p) { pt.first = relno; pt.second = pair(p->first, p->second); proptriples.push_back(pt); if (counts.count(p->first) == 0) counts[p->first] = 0; counts[p->first]++; counts["PROPS"]++; counts["ALL"]++; if (spanToPTriple.count((*iter)->span) == 0) spanToPTriple[(*iter)->span] = vector(); spanToPTriple[(*iter)->span] .push_back(proptriples.size()-1); } } //else cerr << "no features for arg0 " << (*iter)->arg0 << endl; } } } tMRS::~tMRS() { } // expects and removes char x at start of string rest, skipping (and removing) // whitespace before and after x void tMRS::parseChar(char x, string &rest) { if (debug) cerr << "parsing " << x << endl; while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); if (!rest.empty() && rest.at(0) == x) { rest.erase(0,1); while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); } else { if (rest.empty()) cerr << "Reached end of string while looking for " << x; else cerr << "Ill-formed MRS. Expecting " << x << " and got \"" << rest << "\""; cerr << ". Terminating" << endl; exit(1); } return; } void tMRS::parseID(const string id, string &rest) { if (debug) cerr << "parsing " << id << endl; if (rest.substr(0,id.length()) != id) { cerr << "Ill-formed MRS: " << "no " << id << " at \"" << rest << "\"" << endl; exit(1); } rest.erase(0,id.length()); while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); return; } Var tMRS::parseVar(string &rest) { Var variable; VarType vt; VarID vid; if (debug) cerr << "parsing Var at " << rest << endl; if (isalpha(rest.at(0))) { vt = rest.substr(0,1); rest.erase(0,1); while(!rest.empty() && !isspace(rest.at(0)) && !isdigit(rest.at(0))) { vt += rest.at(0); rest.erase(0,1); } if (!isdigit(rest.at(0))) { cerr << "Ill-formed variable at \"" << rest << "\"" << endl; exit(1); } while (!rest.empty() && isdigit(rest.at(0))) { vid += rest.at(0); rest.erase(0,1); } variable = Var(vt, vid); } else { cerr << "Couldn't find a variable at \"" << rest << "\"" << endl; exit(1); } while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); return variable; } Props *tMRS::parseProps(string &rest) { if (debug) cerr << "parsing Props at " << rest << endl; parseChar('[', rest); if (!isalpha(rest.at(0))) { cerr << "Ill-formed MRS. Expecting variable type at \"" << rest << "Terminating." << endl; exit(1); } rest.erase(0,1); while(!rest.empty() && !isspace(rest.at(0)) && !isdigit(rest.at(0))) { rest.erase(0,1); } while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); Props *p = new Props; string feat = readFeature(rest); while (!feat.empty()) { string val; while (!rest.empty() && isgraph(rest.at(0))) { val += rest.at(0); rest.erase(0,1); } (*p)[feat] = val; while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); feat = readFeature(rest); } parseChar(']', rest); return p; } string tMRS::readFeature(string &rest) { if (debug) cerr << "parsing Feature at " << rest << endl; string f; while (!rest.empty() && isgraph(rest.at(0)) && rest.at(0) != '[' && rest.at(0) != ']') { if (rest.at(0) == ':') { rest.erase(0,1); while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); return f; } f += rest.at(0); rest.erase(0,1); } string fail; return fail; } void tMRS::parseRel(string &rest) { if (debug) cerr << "parsing Rel at " << rest << endl; parseChar('[', rest); Rel *rel = new Rel; rel->type = REAL; rel->roles = new Roles; pair relname = parseName(rest); rel->name = relname.first; rel->span = relname.second; string id = readFeature(rest); while (!id.empty()) { if (id == "LBL") rel->lbl = parseVar(rest); else { if (id == "ARG0") { rel->arg0 = parseVar(rest); if (rest.at(0) == '[') arg0s[rel->arg0] = parseProps(rest); else { if (arg0s.count(rel->arg0) == 0) { arg0s[rel->arg0] = NULL; //some ARG0s don't have any props } } } else { if (id == "CARG") { rel->carg = parseString(rest); } else { Var temprole = parseVar(rest); (*(rel->roles))[id] = temprole; if (id == "RSTR") rel->type = QUANT; if (rest.at(0) == '[') arg0s[(*(rel->roles))[id]] = parseProps(rest); } } } id = readFeature(rest); } parseChar(']', rest); rels.push_back(rel); if (rel->type != QUANT) arg0ToRel[rel->arg0] = rels.size()-1; if (lblToArg0.count(rel->lbl)) { // already seen this label, decide which is the representative rel if (arg0ToRel.count(lblToArg0[rel->lbl]) && (*(rels[arg0ToRel[lblToArg0[rel->lbl]]])).roles->count("ARG1") && (*(*(rels[arg0ToRel[lblToArg0[rel->lbl]]])).roles)["ARG1"] ==rel->arg0) lblToArg0[rel->lbl] = rel->arg0; } else lblToArg0[rel->lbl] = rel->arg0; } string tMRS::parseString(string &rest) { if (debug) cerr << "parsing String at " << rest << endl; string carg; if (rest.at(0) == '"') rest.erase(0,1); else { if (rest.at(0) == '*') { rest.erase(0,1); while (!rest.empty() && rest.at(0) != '*') rest.erase(0,1); if (rest.at(0) != '*') { cerr << "Ill-formed MRS. Expecting *-quoted string at \"" << rest << "\". Terminating." << endl; exit(1); } rest.erase(0,1); while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); return carg; } else { if(rest.compare(0,6,"string")==0) { rest.erase(0,6); while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); return carg; } else { cerr << "Ill-formed MRS. Expecting quoted string at \"" << rest << "\". Terminating." << endl; exit(1); } } } while (!rest.empty()) { if (rest.at(0) == '"') { //handle escapes when i find out how they are escaped rest.erase(0,1); while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); return carg; } else { carg += rest.at(0); rest.erase(0,1); } } cerr << "Ill-formed MRS. Unterminated quoted string at \"" << rest << "Terminating." << endl; exit(1); return carg; } pair tMRS::parseName(string &rest) { if (debug) cerr << "parsing Name at " << rest << endl; string name; string span; if (rest.at(0) == '"') { rest.erase(0,1); while (!rest.empty() && !isspace(rest.at(0))) { if (rest.at(1) == '"') { if (rest.at(0) != '\\') { name += rest.at(0); rest.erase(0,1); break; } } name += rest.at(0); rest.erase(0,1); } //we should have broken, with rest[0] == '"' if (!rest.empty() && rest.at(0) != '"') { cerr << "Unterminated quoted string at \"" << rest << "\". Terminating." << endl; exit(1); } else { if (!rest.empty()) rest.erase(0,1); } } else { while (!rest.empty() && !isspace(rest.at(0)) && rest.at(0) != '<') { name += rest.at(0); rest.erase(0,1); } } if (name.length() > 4 && name.substr(name.length()-4, 4) == "_rel") name.erase(name.length()-4,4); if (rest.at(0) == '<') { while (!rest.empty() && isgraph(rest.at(0)) && rest.at(0) != '>') { span += rest.at(0); rest.erase(0,1); } if (rest.at(0) == '>') { span += rest.at(0); rest.erase(0,1); } else { cerr << "Unterminated span at \"" << rest << "\". Terminating." << endl; exit(1); } } else { //no span span = string("<-1:-1>"); } while (!rest.empty() && isspace(rest.at(0))) rest.erase(0,1); return pair(name,span); } bool tMRS::parseHCONS(string &rest) { if (debug) cerr << "parsing HCONS at " << rest << endl; if (isalpha(rest.at(0))) { Var lhdl = parseVar(rest); parseID("qeq", rest); Var rhdl = parseVar(rest); hcons[lhdl] = rhdl; return true; } return false; } void tMRS::printTriples() { for (vector::iterator iter = rels.begin(); iter != rels.end(); ++iter) { cout << (*iter)->name << (*iter)->span << " NAME " << (*iter)->name; if ((*iter)->carg != "") cout << "(" << (*iter)->carg << ")"; cout << endl; } for (vector::iterator ait = argtriples.begin(); ait != argtriples.end(); ++ait) { cout << rels[ait->first]->name << rels[ait->first]->span; if (rels[ait->first]->carg != "") cout << "(" << rels[ait->first]->carg << ")"; cout << " " << ait->second.first << " "; cout << rels[ait->second.second]->name << rels[ait->second.second]->span; if (rels[ait->second.second]->carg != "") cout << "(" << rels[ait->second.second]->carg << ")"; cout << endl; } for (vector::iterator pit = proptriples.begin(); pit != proptriples.end(); ++pit) { cout << rels[pit->first]->name << rels[pit->first]->span; if (rels[pit->first]->carg != "") cout << "(" << rels[pit->first]->carg << ")"; cout << " " << pit->second.first << " "; cout << pit->second.second; cout << endl; } } void tMRS::printCounts() { cout << setw(15) << "ALL" << "\t" << counts["ALL"] << endl; cout << setw(15) << "ARGS" << "\t" << counts["ARGS"] << endl; cout << setw(15) << "PROPS" << "\t" << counts["PROPS"] << endl; cout << setw(15) << "NAMES" << "\t" << counts["NAMES"] << endl; for (map::iterator it = counts.begin(); it != counts.end(); ++it) { if (it->first != "ALL" && it->first != "ARGS" && it->first != "PROPS" && it->first != "NAMES") { cout << setw(15) << it->first << "\t" << it->second << endl; } } } //compares the triples of two MRS and returns the counts of matching triples and //a list of the unmatched triples pair, vector > tMRS::compareTriples(tMRS &test) { map correct; vector errors; correct["ALL"] = 0; correct["NAMES"] = 0; correct["ARGS"] = 0; correct["PROPS"] = 0; set matched; for (vector::iterator iter = rels.begin(); iter != rels.end(); ++iter) { bool found = false; if (test.spanToRel.count((*iter)->span)) { for (vector::iterator rit = test.spanToRel[(*iter)->span].begin(); rit != test.spanToRel[(*iter)->span].end(); ++rit) { if (matched.count(*rit)) continue; //already matched this one if (test.rels[*rit]->name == (*iter)->name) { correct["ALL"]++; correct["NAMES"]++; matched.insert(*rit); found = true; break; } } } if (!found) { // save/print unmatched triples Unmatched t("<", (*iter)->carg!=""? string((*iter)->name+"("+(*iter)->carg+")"):(*iter)->name, (*iter)->span, "NAME", (*iter)->name); errors.push_back(t); } } //save/print unmatched test NAME triples for (vector::iterator iter = test.rels.begin(); iter != test.rels.end(); ++iter) { int relno = iter - test.rels.begin(); if (matched.count(relno) == 0) { Unmatched t(">", (*iter)->carg!=""? string((*iter)->name+"("+(*iter)->carg+")"):(*iter)->name, (*iter)->span, "NAME", (*iter)->name); errors.push_back(t); } } matched.clear(); for (vector::iterator iter = argtriples.begin(); iter != argtriples.end(); ++iter) { bool found = false; if (test.spanToATriple.count(rels[(*iter).first]->span)) { for (vector::iterator rit = test.spanToATriple[rels[(*iter).first]->span].begin(); rit != test.spanToATriple[rels[(*iter).first]->span].end(); ++rit) { if (matched.count(*rit)) continue; // already matched this triple if (test.argtriples[*rit].second.first == iter->second.first && test.rels[test.argtriples[*rit].second.second]->span == rels[iter->second.second]->span) { correct["ALL"]++; correct["ARGS"]++; if (correct.count(iter->second.first) == 0) correct[iter->second.first] = 0; correct[iter->second.first]++; matched.insert(*rit); found = true; break; } } } if (!found) { // save/print unmatched triples Unmatched t("<", rels[(*iter).first]->carg!=""? string(rels[(*iter).first]->name+"("+rels[(*iter).first]->carg+")"): rels[(*iter).first]->name, rels[(*iter).first]->span, iter->second.first, rels[iter->second.second]->carg!=""? string(rels[iter->second.second]->name+ "("+rels[iter->second.second]->carg+")"): rels[iter->second.second]->name, rels[iter->second.second]->span); errors.push_back(t); } } //save/print unmatched test ARG triples for (vector::iterator iter = test.argtriples.begin(); iter != test.argtriples.end(); ++iter) { int tripno = iter - test.argtriples.begin(); if (matched.count(tripno) == 0) { Unmatched t(">", test.rels[iter->first]->carg!=""? string(test.rels[iter->first]->name+ "("+test.rels[iter->first]->carg+")"): test.rels[iter->first]->name, test.rels[iter->first]->span, iter->second.first, test.rels[iter->second.second]->carg!=""? string(test.rels[iter->second.second]->name +"("+test.rels[iter->second.second]->carg+")"): test.rels[iter->second.second]->name, test.rels[iter->second.second]->span); errors.push_back(t); } } matched.clear(); for (vector::iterator iter = proptriples.begin(); iter != proptriples.end(); ++iter) { bool found = false; if (test.spanToPTriple.count(rels[(*iter).first]->span)) { for (vector::iterator rit = test.spanToPTriple[rels[(*iter).first]->span].begin(); rit != test.spanToPTriple[rels[(*iter).first]->span].end(); ++rit) { if (matched.count(*rit)) continue; // already matched this triple if (test.proptriples[*rit].second.first == iter->second.first && test.proptriples[*rit].second.second == iter->second.second) { correct["ALL"]++; correct["PROPS"]++; if (correct.count(iter->second.first) == 0) correct[iter->second.first] = 0; correct[iter->second.first]++; matched.insert(*rit); found = true; break; } } } if (!found) { // save/print unmatched triples Unmatched t("<", rels[(*iter).first]->carg!=""? string(rels[(*iter).first]->name + "("+rels[(*iter).first]->carg+")"): rels[(*iter).first]->name, rels[(*iter).first]->span, iter->second.first, iter->second.second); errors.push_back(t); } } //save/print unmatched test PROP triples for (vector::iterator iter = test.proptriples.begin(); iter != test.proptriples.end(); ++iter) { int tripno = iter - test.proptriples.begin(); if (matched.count(tripno) == 0) { Unmatched t(">", test.rels[iter->first]->carg!=""? string(test.rels[iter->first]->name + "("+test.rels[iter->first]->carg+")"):test.rels[iter->first]->name, test.rels[iter->first]->span, iter->second.first, iter->second.second); errors.push_back(t); } } matched.clear(); return pair,vector >(correct, errors); }