/* PrefBinarizer.java Created on May 13, 2010 by Richard Johansson (johansson@disi.unitn.it). $Log: PrefBinarizer.java,v $ Revision 1.1 2011-07-11 11:51:37 johansson New experiments. Revision 1.1 2010-06-07 11:30:23 johansson Major update. */ package mpqa_seq_reranker; import java.util.*; import se.lth.cs.nlp.nlputils.core.*; import se.lth.cs.nlp.nlputils.ml_long.*; import se.lth.cs.nlp.nlputils.ml_long.classifiers.*; /** * @author Richard Johansson (richard@cs.lth.se) * @deprecated * TODO merge with normal PrefBinarizer */ public class PrefBinarizer extends LearningAlgorithm { private AlgorithmFactory af = new AlgorithmFactory(); private String[] argv; private int offset; public PrefBinarizer(String[] argv, int offset) { this.argv = argv; this.offset = 0; } private static int LEFT = 1; private static int RIGHT = 2; public Classifier train(ProblemDefinition spec, List trainingSet) { if(!(spec instanceof RerankingDefinition)) throw new RuntimeException("unsupported problem type"); SparseVectorClassEncoder enc = new SparseVectorClassEncoder(); IntPairEncoder penc = new IntPairEncoder(); ArrayList> encoded = new ArrayList(); System.out.println("Binarizing training set, size = " + trainingSet.size()); int count = 0; for(Object o: trainingSet) { Pair p = (Pair) o; NBestRepresentation l = (NBestRepresentation) p.left; Integer r = (Integer) p.right; for(int i = 0; i < l.reps.length; i++) { if(r.equals(i)) continue; SparseVector svCorrLeft = new SparseVector(); enc.encode(l.reps[r], LEFT, svCorrLeft); enc.encode(l.reps[i], RIGHT, svCorrLeft); svCorrLeft.sortIndices(); svCorrLeft.trim(); encoded.add(new Pair(svCorrLeft, 1)); SparseVector svCorrRight = new SparseVector(); enc.encode(l.reps[i], LEFT, svCorrRight); enc.encode(l.reps[r], RIGHT, svCorrRight); svCorrRight.sortIndices(); svCorrRight.trim(); encoded.add(new Pair(svCorrRight, -1)); } count++; if(count % 1000 == 0) { System.out.println("count = " + count + ", enc ts size = "+ encoded.size() + " sv enc size = " + enc.size()); } } penc.freeze(); enc.freeze(); EncodedMulticlassDefinition subSpec = new EncodedMulticlassDefinition(); System.out.println("Training preference-binarized..."); LearningAlgorithm alg = af.create(argv[offset], argv, offset + 1); BinaryNumericClassifier cl = (BinaryNumericClassifier) alg.train(subSpec, encoded); return new PreferenceClassifier(cl, enc); } static class PreferenceClassifier extends Classifier { private static final long serialVersionUID = 0L; private BinaryNumericClassifier cl; private SparseVectorClassEncoder enc; PreferenceClassifier(BinaryNumericClassifier cl, SparseVectorClassEncoder enc) { this.cl = cl; this.enc = enc; } private int apply(NBestRepresentation nb) { int bestValue = -1; double maxScore = Double.NEGATIVE_INFINITY; for(int i = 0; i < nb.reps.length; i++) { double score = 0; SparseVector sv = new SparseVector(); enc.encode(nb.reps[i], LEFT, sv); sv.sortIndices(); score += cl.apply(sv); sv.clear(); enc.encode(nb.reps[i], RIGHT, sv); sv.sortIndices(); score -= cl.apply(sv); if(score > maxScore) { bestValue = i; maxScore = score; } } return bestValue; } public Integer classify(NBestRepresentation nb) { return apply(nb); } } }