package mpqareader; import se.lth.cs.nlp.nlputils.core.*; import se.lth.cs.nlp.nlputils.ml_long.*; import se.lth.cs.nlp.nlputils.depgraph.*; import se.lth.cs.nlp.nlputils.framenet.*; import se.lth.cs.nlp.depsrl.format.*; import srlpostprocess.*; // build path cycle ///import tkreranker.Linearizer; import java.io.*; import java.util.*; public class BOWPolSubjSentClassifier { static final boolean SET = false; static final boolean NORMALIZE = true; //true; //true; private static final boolean REMOVE_STOP_WORDS = false; private static final boolean DUMP_SVMLIGHT = false; private static final String SVMLIGHT_FILE = "svmlight.dump"; private static final boolean USE_CHAR_TRIGRAMS = false; private static final boolean USE_FRAMES = false; private static final boolean USE_WB_FEATURES = false; private static final boolean REPLACE_LEMMAS = false; private static final boolean USE_UNIGRAMS = true; private static final boolean USE_POS = false; private static final boolean USE_BIGRAMS = false; private static final boolean USE_POS_BIGRAMS = false; private static final boolean USE_PRED_SENSES = false; private static final boolean USE_VO_TUPLES = false; private static final boolean USE_GFS = false; private static final boolean USE_PA_TUPLES = false; private static final boolean USE_RAW_PA_TUPLES = false; // private static final boolean USE_UNIGRAMS = true; // private static final boolean USE_POS = true; // private static final boolean USE_BIGRAMS = true; // private static final boolean USE_POS_BIGRAMS = true; // private static final boolean USE_PRED_SENSES = true; // private static final boolean USE_VO_TUPLES = true; // private static final boolean USE_GFS = true; // private static final boolean USE_PA_TUPLES = true; // private static final boolean USE_RAW_PA_TUPLES = false; private static final boolean USE_PREV_CLASS = false; static final boolean USE_SUBJLEX = true; //true; private static final boolean USE_JUSSI_FEATURES = false; // private static final boolean USE_UNIGRAMS = true; // private static final boolean USE_POS = false; // private static final boolean USE_BIGRAMS = false; // private static final boolean USE_POS_BIGRAMS = false; // private static final boolean USE_PRED_SENSES = false; // private static final boolean USE_VO_TUPLES = false; // private static final boolean USE_GFS = false; // private static final boolean USE_PA_TUPLES = false; // private static final boolean USE_RAW_PA_TUPLES = false; //private static boolean FIXED_TESTSET = true; private static ArrayList readSentence(BufferedReader br) throws IOException { ArrayList out = new ArrayList(); String line = br.readLine().trim(); while(!line.isEmpty()) { out.add(line); line = br.readLine().trim(); } return out; } private static final String[] STOP_WORDS = new String[] { "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it", "its", "of", "on", "that", "the", "to", "was", "were", "will", "with", }; private static HashSet STOP_WORD_SET = new HashSet(); static { STOP_WORD_SET.addAll(Arrays.asList(STOP_WORDS)); } static SparseVector representSentence(ArrayList sen, Triple> g, LexicalDB propBank, LexicalDB nomBank, FrameNet framenet, SubjectivityLexicon subjLex, SymbolEncoder enc) throws IOException { for(int i = 0; i < sen.size(); i++) { String s = sen.get(i); s = s.toLowerCase(); s = s.replaceAll("[1-9]", "0"); sen.set(i, s); } SparseVector sv = new SparseVector(); if(USE_CHAR_TRIGRAMS) { StringBuilder sb = new StringBuilder(); for(int i = 0; i < sen.size(); i++) { sb.append(sen.get(i)); } String s = sb.toString().trim(); s = "##" + s + "##"; int l = s.length(); for(int i = 0; i < l - 2; i++) { String t = "t_" + s.substring(i, i + 2); sv.put(enc.encode(t), 1.0); } } if(USE_FRAMES) { for(int i = 0; i < sen.size(); i++) { String word = sen.get(i); String lemma = g.first.nodes[i+1].lemma; String pos = g.first.nodes[i+1].pos; if(lemma == null || lemma.equals("_")) lemma = word; if(pos.startsWith("N")) pos = "n"; else if(pos.startsWith("V")) pos = "v"; else if(pos.startsWith("JJ")) pos = "a"; else if(pos.startsWith("RB")) pos = "adv"; else pos = null; if(pos != null) { Set lus = framenet.getLUsByHeadPos(lemma, pos); if(lus != null) for(LexUnit lu: lus) sv.put(enc.encode("FN_" + lu.frame), 1.0); } } } if(USE_WB_FEATURES) { for(int i = 0; i < sen.size(); i++) { String word = sen.get(i); String pos = g.first.nodes[i+1].pos; if(pos.equals("MD") && !word.equals("will")) sv.put(enc.encode("WB_MD"), 1.0); else if(pos.startsWith("RB") && !word.equals("not")) sv.put(enc.encode("WB_RB"), 1.0); else if(pos.startsWith("PRP")) sv.put(enc.encode("WB_PRP"), 1.0); else if(pos.startsWith("JJ")) sv.put(enc.encode("WB_JJ"), 1.0); else if(pos.startsWith("CD")) sv.put(enc.encode("WB_CD"), 1.0); } } // unigrams if(USE_UNIGRAMS) for(int i = 0; i < sen.size(); i++) { String s = sen.get(i); if(REPLACE_LEMMAS) { String l = g.first.nodes[i+1].lemma; if(l != null && !l.equals("_")) s = l; } if(!REMOVE_STOP_WORDS || !STOP_WORD_SET.contains(s)) sv.put(enc.encode(sen.get(i)), 1.0); } if(USE_POS) for(int i = 0; i < sen.size(); i++) sv.put(enc.encode(g.first.nodes[i+1].pos), 1.0); // bigrams if(USE_BIGRAMS) for(int i = 0; i < sen.size() - 1; i++) { String s = sen.get(i) + "_" + sen.get(i + 1); sv.put(enc.encode(s), 1.0); } if(USE_POS_BIGRAMS) for(int i = 1; i < g.first.nodes.length - 1; i++) { String w1 = sen.get(i-1); String w2 = sen.get(i); String p1 = g.first.nodes[i].pos; String p2 = g.first.nodes[i+1].pos; sv.put(enc.encode(w1 + "_" + p2), 1.0); sv.put(enc.encode(p1 + "_" + w2), 1.0); } if(USE_SUBJLEX) { for(int i = 1; i < g.first.nodes.length; i++) { String w = sen.get(i-1); String p = g.first.nodes[i].pos; String l = g.first.nodes[i].lemma; String slClue = subjLex.lookup(w, p, l); if(slClue != null) { //System.err.println(l + "/" + p + ": slClue = " + slClue); sv.put(enc.encode(slClue), 1.0); } } } // predicate lemmas if(USE_PRED_SENSES) for(PAStructure pa: g.third) sv.put(enc.encode(pa.lemma), 1.0); // verb--object tuples if(USE_VO_TUPLES) { DepGraph dg = g.first; for(int i = 1; i < dg.nodes.length; i++) { DepNode n = dg.nodes[i]; DepNode p = n.parents[0]; if(n.relations[0].equals("OBJ")) { String s = p.word + "_OBJ_" + n.word; s = s.toLowerCase(); sv.put(enc.encode(s), 1.0); } /*else if(n.relations[0].equals("SBJ")) { String s = p.word + "_SBJ_" + n.word; s = s.toLowerCase(); sv.put(enc.encode(s), 1.0); }*/ } } // syntactic functions if(USE_GFS) { DepGraph dg = g.first; for(int i = 1; i < dg.nodes.length; i++) sv.put(enc.encode(dg.nodes[i].relations[0]), 1.0); } if(USE_RAW_PA_TUPLES) for(PAStructure pa: g.third) { for(int i = 0; i < pa.args.size(); i++) { String l = pa.argLabels.get(i); DepNode a = pa.args.get(i); if(l.matches("A0|A1|A2")) { String s = pa.lemma + "_" + l + "_" + a.word; s = s.toLowerCase(); sv.put(enc.encode(s), 1.0); } } } // predicate--argument tuples if(USE_PA_TUPLES) { ArrayList ns = SRLPostProcess.processPAs(g.third, g.first, propBank, nomBank); for(SemNode n: ns) { if(!(n instanceof EventSemNode)) continue; EventSemNode en = (EventSemNode) n; for(ArgLink l: en.args) { if(l.rid.matches("A0|A1|A2|A3")) { String as; if(l.arg instanceof EventSemNode) as = ((EventSemNode) l.arg).lemma; else as = ((TokenSemNode) l.arg).word; String ps = en.lemma; String rel = l.rid; /*if(l.vn != null) rel = l.vn; else rel = l.rid;*/ String tuple = ps + "_" + rel + "_" + as; String tuple1 = en.lemma + "_" + rel; //String tuple2 = rel + "_" + as; sv.put(enc.encode(tuple), 1.0); sv.put(enc.encode(tuple1), 1.0); //sv.put(enc.encode(tuple2), 1.0); /*String tuple0a = ps + "_" + as; sv.put(enc.encode(tuple0a), 1.0); String tuple0b = "_" + as; sv.put(enc.encode(tuple0b), 1.0);*/ } } } } if(USE_JUSSI_FEATURES) { // tense shift for(int i = 1; i < g.first.nodes.length; i++) { DepNode n = g.first.nodes[i]; DepNode p = n.parents[0]; if(p.position == 0) continue; if(!n.relations[0].equals("OBJ")) continue; if(n.pos.equals("VBD") && p.pos.matches("VBP|VBZ") || n.pos.matches("VBP|VBZ") && p.pos.equals("VBD")) { sv.put(enc.encode("#tenseshift"), 1.0); } } } sv.sortIndices(); sv.trim(); return sv; } private static void weightIDF(SparseVector sv, int N, IntHistogram dfs) { for(int i = 0; i < sv.index; i++) { int df = dfs.getFrequency((int) sv.keys[i]); double idf = Math.log(N) - Math.log(df); sv.values[i] *= idf; } } static void normalize2(SparseVector sv) { double sum = 0; for(int i = 0; i < sv.index; i++) sum += sv.values[i] * sv.values[i]; double isum = 1.0 / Math.sqrt(sum); // obs tidigare inte sqrt!!! for(int i = 0; i < sv.index; i++) sv.values[i] *= isum; } private static final boolean USE_POLARITY = false; static final int SUBJ = 1, NO_SUBJ = 2; static final int NONE = 1; static final int LOW = 2; static final int MEDIUM = 3; static final int HIGH = 4; static final int EXTREME = 5; // we later map extreme to high public static String fromIntValue(int intValue) { switch(intValue) { case NONE: return "none"; case LOW: return "low"; case MEDIUM: return "medium"; case HIGH: return "high"; case EXTREME: return "extreme"; } return null; } public static String fromIntValueShort(int intValue) { switch(intValue) { case NONE: return "n"; case LOW: return "l"; case MEDIUM: return "m"; case HIGH: return "h"; case EXTREME: return "e"; } return null; } public static int toIntValue(String intens) { if(intens.equals("none")) return NONE; if(intens.equals("low")) return LOW; if(intens.equals("medium")) return MEDIUM; if(intens.equals("high")) return HIGH; if(intens.equals("extreme")) return EXTREME; return -1; } public static int toIntValueShort(String intens) { if(intens == null) return NONE; if(intens.equals("n")) return NONE; if(intens.equals("l")) return LOW; if(intens.equals("m")) return MEDIUM; if(intens.equals("h")) return HIGH; if(intens.equals("e")) return EXTREME; return -1; } public static void train_cv(String[] argv) { String fileName = argv[1]; boolean testing = Boolean.parseBoolean(argv[2]); String testsetFilelist = argv[3]; if(testing || !testsetFilelist.equals("NONE")) throw new RuntimeException("unsupported"); String synsemFileName = argv[4]; String pbDir = argv[5]; String nbDir = argv[6]; String fnFile = argv[7]; String fnRelFile = argv[8]; String subjLexFile = argv[9]; int nfolds = Integer.parseInt(argv[10]); boolean idf = Boolean.parseBoolean(argv[11]); String outFile = argv[12]; String modelName = argv[13]; String algName1 = argv[14]; String algArgs1 = argv[15]; String algName2 = argv[16]; String algArgs2 = argv[17]; try { AlgorithmFactory.setVerbosity(0); ArrayList> ts = new ArrayList(); ArrayList> tsP = new ArrayList(); BufferedReader br = new BufferedReader(new FileReader(fileName)); PrintWriter out = null; if(outFile != null) out = new PrintWriter(new FileWriter(outFile)); Scanner sc = new Scanner(Util.openFileStream(synsemFileName)); LexicalDB propBank = new LexicalDB(pbDir); LexicalDB nomBank = new LexicalDB(nbDir); FrameNet fn = null; if(USE_FRAMES) fn = new FrameNet(fnFile, fnRelFile); SubjectivityLexicon subjLex = new SubjectivityLexicon(subjLexFile); SymbolEncoder enc = new SymbolEncoder(); while(true) { String line = br.readLine(); if(line == null) break; if(line.startsWith(" sen = readSentence(br); //System.out.println(isSubjective + "\t" + sen); Triple> g = CoNLL2008Format.readNextGraph(sc); SparseVector sv = representSentence(sen, g, propBank, nomBank, fn, subjLex, enc); if(SET) for(int j = 0; j < sv.index; j++) sv.values[j] = 1.0; if(NORMALIZE) { normalize2(sv); /*double sqrlen = SparseVector.sortedSqrLength(sv); if(Math.abs(sqrlen - 1.0) > 1e-12) throw new RuntimeException("!!!");*/ } //System.out.println(sv); ts.add(new Pair(sv, intens)); } Random rand = new Random(0); int foldSize = ts.size() / nfolds; int count = 0; int nCorrect = 0; int nSubjCorrect = 0; PRFStats posstats = new PRFStats(); PRFStats neustats = new PRFStats(); PRFStats negstats = new PRFStats(); for(int i = 0; i < nfolds; i++) { System.out.println("Fold " + (i + 1) + "."); ArrayList> trs = new ArrayList(); ArrayList> tes = new ArrayList(); for(int j = 0; j < i*foldSize; j++) { Pair p = ts.get(j); trs.add(new Pair(p.left.copy(), p.right)); } int fend = (i == nfolds - 1)? ts.size(): (i + 1)*foldSize; for(int j = i*foldSize; j < fend; j++) { Pair p = ts.get(j); tes.add(new Pair(p.left.copy(), p.right)); } for(int j = fend; j < ts.size(); j++) { Pair p = ts.get(j); trs.add(new Pair(p.left.copy(), p.right)); } if(idf) { IntHistogram dfs = new IntHistogram(); for(Pair p: trs) for(int j = 0; j < p.left.index; j++) dfs.add((int) p.left.keys[j]); for(Pair p: trs) weightIDF(p.left, trs.size(), dfs); for(Pair p: tes) weightIDF(p.left, trs.size(), dfs); } /*if(SET) { for(Pair p: trs) for(int j = 0; j < p.left.index; j++) p.left.values[j] = 1.0; for(Pair p: tes) for(int j = 0; j < p.left.index; j++) p.left.values[j] = 1.0; }*/ ArrayList> trs1 = new ArrayList(); for(Pair p: trs) trs1.add(new Pair(p.left.copy(), p.right.isSubjective()? SUBJ: NO_SUBJ)); for(Iterator> it = trs.iterator(); it.hasNext(); ) if(!it.next().right.isSubjective()) it.remove(); AlgorithmFactory af = new AlgorithmFactory(); LearningAlgorithm alg1 = af.create(algName1, algArgs1); EncodedMulticlassDefinition def1 = new EncodedMulticlassDefinition(new int[] { SUBJ, NO_SUBJ }); Classifier cl1 = alg1.train(def1, trs1); LearningAlgorithm alg2 = af.create(algName2, algArgs2); //IntensitiesSplitter alg2 = new IntensitiesSplitter(algName, algArgs); IntensitiesDefinition def2 = new IntensitiesDefinition(); Classifier cl2 = alg2.train(def2, trs); for(int j = 0; j < tes.size(); j++) { Pair p = tes.get(j); SparseVector sv = p.left; Intensities gold = p.right; count++; Intensities guess; int guess1 = cl1.classify(sv); if(guess1 == NO_SUBJ) guess = new Intensities(NONE, NONE, NONE); else guess = cl2.classify(sv); if(gold.equals(guess)) nCorrect++; if(!(gold.isSubjective() ^ guess.isSubjective())) nSubjCorrect++; final int THRESHOLD = LOW; if(gold.pos >= THRESHOLD) { posstats.nInGold++; if(guess.pos >= THRESHOLD) posstats.nOverlap++; if(guess.pos == gold.pos) posstats.nCorrect++; } if(guess.pos >= THRESHOLD) posstats.nGuesses++; if(gold.neu >= THRESHOLD) { neustats.nInGold++; if(guess.neu >= THRESHOLD) neustats.nOverlap++; if(guess.neu == gold.neu) neustats.nCorrect++; } if(guess.neu >= THRESHOLD) neustats.nGuesses++; if(gold.neg >= THRESHOLD) { negstats.nInGold++; if(guess.neg >= THRESHOLD) negstats.nOverlap++; if(guess.neg == gold.neg) negstats.nCorrect++; } if(guess.neg >= THRESHOLD) negstats.nGuesses++; } } double acc = (double) nCorrect / count; double subjAcc = (double) nSubjCorrect / count; System.out.printf("Acc = %d / %d = %f\n", nCorrect, count, acc); System.out.printf("Subj acc = %d / %d = %f\n", nSubjCorrect, count, subjAcc); System.out.println("Positive:"); posstats.print(); System.out.println("Neutral:"); neustats.print(); System.out.println("Negative:"); negstats.print(); if(!modelName.matches("NONE|NULL|none|null")) { System.out.println("Training final model: " + modelName); if(idf) { IntHistogram dfs = new IntHistogram(); for(Pair p: ts) for(int j = 0; j < p.left.index; j++) dfs.add((int) p.left.keys[j]); for(Pair p: ts) weightIDF(p.left, ts.size(), dfs); } ArrayList> trs1 = new ArrayList(); for(Pair p: ts) trs1.add(new Pair(p.left.copy(), p.right.isSubjective()? SUBJ: NO_SUBJ)); for(Iterator> it = ts.iterator(); it.hasNext(); ) if(!it.next().right.isSubjective()) it.remove(); AlgorithmFactory af = new AlgorithmFactory(); LearningAlgorithm alg1 = af.create(algName1, algArgs1); EncodedMulticlassDefinition def1 = new EncodedMulticlassDefinition(new int[] { SUBJ, NO_SUBJ }); Classifier cl1 = alg1.train(def1, trs1); LearningAlgorithm alg2 = af.create(algName2, algArgs2); //IntensitiesSplitter alg2 = new IntensitiesSplitter(algName, algArgs); IntensitiesDefinition def2 = new IntensitiesDefinition(); Classifier cl2 = alg2.train(def2, ts); enc.freeze(); ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(modelName)); oos.writeObject(enc); oos.writeObject(cl1); oos.writeObject(cl2); if(USE_SUBJLEX) oos.writeObject(subjLex); oos.close(); } } catch(Exception e) { e.printStackTrace(); System.exit(1); } } private static void printWeights(SymbolEncoder enc, Classifier cl) { ArrayList encInv = enc.inverse(); int max = enc.size(); SparseVector sv = new SparseVector(); ArrayList range = new ArrayList(); range.add(SUBJ); range.add(NO_SUBJ); double[] scores = new double[2]; cl.computeScoresRestricted(new SparseVector(), range, scores); double bias = scores[0]; System.out.println("bias = " + bias); ArrayList> l = new ArrayList(); sv.index = 1; sv.values[0] = 1.0; for(int i = 1; i < max; i++) { sv.keys[0] = i; cl.computeScoresRestricted(sv, range, scores); String k = encInv.get(i); l.add(new DoubleObjPair(scores[0] - bias, k)); //System.out.println(i + "\t" + scores[0] + "\t" + scores[1]); } Collections.sort(l, new NegatedComparator(DoubleObjPair.BY_LEFT)); for(DoubleObjPair p: l) System.out.println(p); } private static void printErrorSentences(String filename, HashSet fpIndices, HashSet fnIndices, ArrayList> ts, ArrayList scoreList) throws IOException { BufferedReader br = new BufferedReader(new FileReader(filename)); String line = null; int count = 0; ArrayList sentences = new ArrayList(); while(true) { count++; line = br.readLine(); if(line == null) break; if(line.startsWith(" sen = readSentence(br); sentences.add((isSubjective? "Subjective": "Objective") + "\t" + Strings.join(sen, " ")); //if(fpIndices.contains(count)) // System.out.println("FP\t" + sen); //else if(fnIndices.contains(count)) // System.out.println("FN\t " + sen); //else // System.out.println("OK\t " + sen + "\t" + ts.get(count-1)); } Collections.sort(scoreList, DoubleIntPair.BY_LEFT); for(DoubleIntPair p: scoreList) { System.out.println(p.left + "\t" + sentences.get(p.right - 1)); } } private static String getSubjectivityClue(DepNode n, SubjectivityLexicon subjLex) { return subjLex.lookup(n.word, n.pos, n.lemma); } private static String[] getSubjectivityClues(DepGraph dg, SubjectivityLexicon subjLex) { String[] out = new String[dg.nodes.length]; for(int i = 1; i < dg.nodes.length; i++) out[i] = getSubjectivityClue(dg.nodes[i], subjLex); return out; } private static void modifyTreeSubjNodes(DepGraph dg, SubjectivityLexicon subjLex) { for(int i = 1; i < dg.nodes.length; i++) { String clue = getSubjectivityClue(dg.nodes[i], subjLex); if(clue != null) dg.nodes[i].word = clue; //out[i] = getSubjectivityClue(dg.nodes[i], subjLex); } } private static String bowRepr(DepGraph dg, SubjectivityLexicon subjLex) { StringBuilder out = new StringBuilder(); out.append("(BOW "); for(int i = 1; i < dg.nodes.length; i++) { String w = dg.nodes[i].word; w = w.toLowerCase(); w = w.replaceAll("[1-9]", "0"); w = w.replaceAll("\\(", "-LRB-"); w = w.replaceAll("\\)", "-RRB-"); w = w.replaceAll("\\[", "-LSB-"); w = w.replaceAll("\\]", "-RSB-"); w = w.replaceAll("\\{", "-LCB-"); w = w.replaceAll("\\}", "-RCB-"); String slClue = getSubjectivityClue(dg.nodes[i], subjLex); if(USE_SUBJLEX) if(slClue != null) out.append("(#b_" + slClue + ")"); out.append("(" + w + ")"); if(USE_SUBJLEX) if(slClue != null) out.append("(#e_" + slClue + ")"); } out.append(")"); return out.toString(); } private static String posRepr(DepGraph dg) { StringBuilder out = new StringBuilder(); out.append("(BOP "); for(int i = 1; i < dg.nodes.length; i++) { String w = dg.nodes[i].pos; w = w.toLowerCase(); w = w.replaceAll("\\(", "-LRB-"); w = w.replaceAll("\\)", "-RRB-"); out.append("(" + w + ")"); } out.append(")"); return out.toString(); } public static void dirs(String[] argv) { boolean usePolarity = Boolean.parseBoolean(argv[1]); boolean multiclass = Boolean.parseBoolean(argv[2]); if(!usePolarity || !multiclass) throw new RuntimeException("illegal setting"); String fileName = argv[3]; String testsetFilelist = argv[4]; String trainOutDir = argv[5]; String testOutDir = argv[6]; try { new File(trainOutDir).mkdir(); new File(trainOutDir + "/pos").mkdir(); new File(trainOutDir + "/neu").mkdir(); new File(trainOutDir + "/neg").mkdir(); new File(testOutDir).mkdir(); new File(testOutDir + "/pos").mkdir(); new File(testOutDir + "/neu").mkdir(); new File(testOutDir + "/neg").mkdir(); HashSet testsetFiles = new HashSet(); BufferedReader tsbr = new BufferedReader(new FileReader(testsetFilelist)); String line = tsbr.readLine(); while(line != null) { line = line.trim(); testsetFiles.add(line); line = tsbr.readLine(); } BufferedReader br = new BufferedReader(new FileReader(fileName)); int count = 0; String outDir = null; while(true) { line = br.readLine(); if(line == null) break; if(line.startsWith(" sen = readSentence(br); if(!usePolarity) { } else if(!multiclass) { } else { if(hasPos) printSentenceToFile(outDir + "/pos/" + count, sen); if(hasNeu) printSentenceToFile(outDir + "/neu/" + count, sen); if(hasNeg) printSentenceToFile(outDir + "/neg/" + count, sen); } } } catch(Exception e) { e.printStackTrace(); System.exit(1); } } private static void printSentenceToFile(String file, ArrayList tokens) throws IOException { //System.out.println("file = " + file); PrintWriter pw = new PrintWriter(new FileWriter(file)); for(String s: tokens) pw.print(s + " "); pw.println(); pw.close(); } private static boolean isStrangeGraph(DepGraph dg) { int n = dg.getMaxBranching(); return n > 8; } private static void printWeightsInSentence(ArrayList sen, SymbolEncoder enc, Classifier cl) { SparseVector sv = new SparseVector(); ArrayList range = new ArrayList(); range.add(SUBJ); range.add(NO_SUBJ); double[] scores = new double[2]; cl.computeScoresRestricted(new SparseVector(), range, scores); double bias = scores[0]; System.out.println("bias = " + bias); ArrayList> l = new ArrayList(); sv.index = 1; sv.values[0] = 1.0; for(String s: sen) { sv.keys[0] = enc.encode(s); cl.computeScoresRestricted(sv, range, scores); System.out.println(s + "\t" + (scores[0] - bias)); } } public static void main(String[] argv) { if(argv[0].equals("-cv")) train_cv(argv); else if(argv[0].equals("-dirs")) dirs(argv); else throw new RuntimeException("illegal parameters"); } }