#include #include #include #include "profile.h" #include "mrs.h" #include "eds.h" #include "mrs-reader.h" #include "mrs-errors.h" using namespace std; void parse_options(int argc, char **argv, string *gprof, string *tprof, bool *ignore, bool *suppresstotal, bool *raw, bool *ignoreroot, bool *verbose, tIid *itemno); double fscore(int g, int t, int c); pair,string> finishItem(Profile &test, tIid item, vector goldmrs, pair,string> lastline, map > &totals, bool raw, bool ignore=false, bool ignoreroot=false, bool verbose=false); int main(int argc, char **argv) { string gprofile, tprofile; bool ignore, suppresstotal, raw, ignoreroot, verbose; tIid itemno; parse_options(argc, argv, &gprofile, &tprofile, &ignore, &suppresstotal, &raw, &ignoreroot, &verbose, &itemno); Profile gold(gprofile); Profile test(tprofile); mrs::SimpleMrsReader reader; vector goldmrs; pair, string> gres = gold.getResult(MRS); pair, string> lastline = pair, string>(pair(-1,-1),string()); tIid item = -1; map > totals; totals["ALL"] = map(); totals["ALL"]['G'] = 0; totals["ALL"]['T'] = 0; totals["ALL"]['C'] = 0; totals["A"] = map(); totals["A"]['G'] = 0; totals["A"]['T'] = 0; totals["A"]['C'] = 0; totals["N"] = map(); totals["N"]['G'] = 0; totals["N"]['T'] = 0; totals["N"]['C'] = 0; totals["P"] = map(); totals["P"]['G'] = 0; totals["P"]['T'] = 0; totals["P"]['C'] = 0; while (gres.first.first >= 0) { //still returning results if (itemno > -1 && gres.first.first != itemno) { //looking for a specific item gres = gold.getResult(MRS); continue; // and not there yet } if (gres.first.first != item) { //new item if (item != -1 && goldmrs.size() > 0) { //finish last item if (gold.numGold(item) == 1) { lastline = finishItem(test, item, goldmrs, lastline, totals, raw, ignore, ignoreroot, verbose); } goldmrs.clear(); } } item = gres.first.first; if (gold.numGold(item) == 1 && !gres.second.empty()) { mrs::tMrs *mrs = reader.readMrs(gres.second); goldmrs.push_back(mrs); } gres = gold.getResult(MRS); } if (item != -1 && goldmrs.size() > 0) { if (gold.numGold(item) == 1) { lastline = finishItem(test, item, goldmrs, lastline, totals, raw, ignore, ignoreroot, verbose); } goldmrs.clear(); } if (!suppresstotal) { cout << "ALL: " << fscore(totals["ALL"]['G'], totals["ALL"]['T'], totals["ALL"]['C']) << ", A: " << fscore(totals["A"]['G'], totals["A"]['T'], totals["A"]['C']) << ", N: " << fscore(totals["N"]['G'], totals["N"]['T'], totals["N"]['C']) << ", P: " << fscore(totals["P"]['G'], totals["P"]['T'], totals["P"]['C']) <0) precision = (double)c/t; if (g>0) recall = (double)c/g; if (precision > 0 && recall > 0) { fscore = (2*precision*recall)/(precision+recall); } return fscore; } pair,string> finishItem(Profile &test, tIid item, vectorgoldmrs, pair,string> lastline, map > &totals, bool raw, bool ignore, bool ignoreroot, bool verbose) { vector categories; categories.push_back("ALL"); categories.push_back("A"); categories.push_back("N"); categories.push_back("P"); pair,string> tres = lastline; mrs::tMrs *testmrs = NULL; mrs::tEds *eds_test = NULL; mrs::SimpleMrsReader reader; vector results; if (test.getReadings(item) > 0) { if (tres.first.first == -1) tres = test.getResult(MRS); while (tres.first.first != -1 && tres.first.first < item) tres = test.getResult(MRS); if (tres.first.first != item) { cerr << "Item " << item << " is not in the test profile." << endl; } else { if (tres.second.empty()) { cerr << "No MRS for item " << item << " in test profile." << endl; } else { try { testmrs = reader.readMrs(tres.second); } catch (tError &e) { cerr << "Failed to parse with error: \"" << e.getMessage() << "\"" << endl; } } } } if (!ignore || testmrs != NULL) { if (testmrs != NULL) eds_test = new mrs::tEds(testmrs); int bestgold = -1; double bestscore = -1; for (vector::iterator git = goldmrs.begin(); git != goldmrs.end(); ++git) { mrs::tEds *eds_gold = new mrs::tEds(*git); mrs::tEdsComparison *result = eds_gold->compare_triples(eds_test, "ALL", ignoreroot); results.push_back(result); if (result->score > bestscore) { bestgold = results.size()-1; bestscore = result->score; } delete eds_gold; } int lowcount; if (testmrs == NULL) { //find smallest gold for (int i=0; i < results.size(); ++i) { if (i == 0 || results[i]->totalA["ALL"] < lowcount) { bestgold = i; lowcount = results[i]->totalA["ALL"]; } } } mrs::tEdsComparison *res = results[bestgold]; if (verbose) { // if (testmrs == NULL) { // cout << item << " unparsed" << endl; // } else { if (!res->unmatchedA.empty() || !res->unmatchedB.empty()) { cerr << "Item: " << test.getItem(item) << endl; cout << "Unmatched Triples in " << item << endl; for (vector::iterator it = res->unmatchedA.begin(); it != res->unmatchedA.end(); ++it) cout << "< " << *it << endl; for (vector::iterator it = res->unmatchedB.begin(); it != res->unmatchedB.end(); ++it) cout << "> " << *it << endl; } // } } if (raw) cout << item; for (vector::iterator it = categories.begin(); it != categories.end(); ++it) { if (raw) cout << "@" << (res->totalA.count(*it)>0?res->totalA[*it]:0); if (res->totalA.count(*it) > 0) { if (totals.count(*it) == 0) totals[*it] = map(); if (totals[*it].count('G') == 0) totals[*it]['G'] = 0; totals[*it]['G'] += res->totalA[*it]; } } for (vector::iterator it = categories.begin(); it != categories.end(); ++it) { if (raw) cout << "@" << (res->totalB.count(*it)>0?res->totalB[*it]:0); if (res->totalB.count(*it) > 0) { if (totals.count(*it) == 0) totals[*it] = map(); if (totals[*it].count('T') == 0) totals[*it]['T'] = 0; totals[*it]['T'] += res->totalB[*it]; } } for (vector::iterator it = categories.begin(); it != categories.end(); ++it) { if (raw) cout << "@" << (res->totalM.count(*it)>0?res->totalM[*it]:0); if (res->totalM.count(*it) > 0) { if (totals.count(*it) == 0) totals[*it] = map(); if (totals[*it].count('C') == 0) totals[*it]['C'] = 0; totals[*it]['C'] += res->totalM[*it]; } } if (raw) cout << endl; if (testmrs != NULL) { delete eds_test; delete testmrs; } } for (vector::iterator rit = results.begin(); rit != results.end(); ++rit) { delete *rit; } for (vector::iterator git = goldmrs.begin(); git != goldmrs.end(); ++git) { delete *git; } return tres; } void parse_options(int argc, char **argv, string *gprof, string *tprof, bool *ignore, bool *suppresstotal, bool *raw, bool *ignoreroot, bool *verbose, tIid *itemno) { namespace po = boost::program_options; po::options_description visible("Options"); visible.add_options() ("help,h", "This usage information.") ("ignore,i", "Ignore gold triples where the test parse failed.") ("suppress,s", "Suppress printing of total F-scores.") ("counts,c", "Print raw counts per item.") ("root,r", "Ignore root triples in comparison.") ("verbose,v", "Print unmatched triples.") ("analysis,a", po::value(itemno)->default_value(-1), "Select a single item to evaluate (-1 for all items).") ; po::options_description hidden("Hidden options"); hidden.add_options() ("goldprof", po::value(gprof), "gold profile") ("testprof", po::value(tprof), "test profile") ; po::options_description cmd_line ("Command line options"); cmd_line.add(visible).add(hidden); po::positional_options_description p; p.add("goldprof", 1).add("testprof", 1); po::variables_map vm; try { po::store(po::command_line_parser(argc, argv). options(cmd_line).positional(p).run(), vm); notify(vm); if (vm.count("help")) { cout << "Usage: " << argv[0] << " [options] " << "gold-profile test-profile" << endl; cout << visible << endl; exit(0); } if (!vm.count("goldprof") || !vm.count("testprof")) { cerr << "Insufficient arguments given." << endl; cerr << "Usage: " << argv[0] << " [options] " << "gold-profile test-profile" << endl; cerr << visible << endl; exit(1); } if (vm.count("ignore")) *ignore = true; else *ignore = false; if (vm.count("suppress")) *suppresstotal = true; else *suppresstotal = false; if (vm.count("counts")) *raw = true; else *raw = false; if (vm.count("root")) *ignoreroot = true; else *ignoreroot = false; if (vm.count("verbose")) *verbose = true; else *verbose = false; } catch (po::error& e ) { cerr << "Error: " << e.what() << endl; cerr << "Usage: " << argv[0] << " [options] " << "gold-profile test-profile" << endl; cerr << visible << endl; exit(1); } }