package se.lth.cs.nlp.opinions; import java.io.Serializable; import java.util.Arrays; import se.lth.cs.nlp.nlputils.ml_long.SymbolEncoder; public class FeatureTemplate implements Serializable { static final long serialVersionUID = 0L; static final int OUTSIDE_LABEL = 1; public long id; // set externally // components of the template are represented as int pairs // [column, row, width, max] // the output column has number 0 private int[][] fs; private long[] offs; private String templateString; public FeatureTemplate(String s, int maxOffset) { s = s.trim(); if(s.length() == 0) throw new IllegalArgumentException("empty string"); templateString = s; String[] ts = s.split("\\s+"); fs = new int[ts.length][]; for(int i = 0; i < fs.length; i++) { fs[i] = parsePart(ts[i]); } offs = new long[ts.length]; long offset = 0; for(int i = 0; i < fs.length; i++) { offs[i] = offset; offset += fs[i][2]; } if(offset >= maxOffset) throw new IllegalArgumentException("too wide"); //System.out.println("fs = " + Arrays.deepToString(fs)); //System.out.println("offs = " + Arrays.toString(offs)); } private int[] parsePart(String s) { s = s.trim(); String[] ss = s.split(":"); if(ss.length != 2) throw new IllegalArgumentException("Illegal feature " + s); int width = Integer.parseInt(ss[1]); String s1 = ss[0]; if(s1.length() < 2) throw new IllegalArgumentException("Illegal feature " + s); if(s1.charAt(1) != '[') throw new IllegalArgumentException("Illegal feature " + s); if(s1.charAt(s1.length() - 1) != ']') throw new IllegalArgumentException("Illegal feature " + s); int[] out = new int[4]; out[2] = width; out[3] = (1 << width) - 1; String s2 = s1.substring(2, s1.length()-1); if(s1.charAt(0) == 'I') { String[] ts = s2.split("\\]\\["); if(ts.length != 2) throw new IllegalArgumentException("Illegal feature " + s); out[0] = Integer.parseInt(ts[0]) + 1; out[1] = Integer.parseInt(ts[1]); } else if(s1.charAt(0) == 'O') { out[1] = Integer.parseInt(s2); if(out[1] > 0) throw new IllegalArgumentException("Illegal feature " + s); } else throw new IllegalArgumentException("Illegal feature " + s); return out; } public int getRange() { int range = 0; for(int i = 0; i < fs.length; i++) if(fs[i][0] == 0) { int r = -fs[i][1] + 1; if(r > range) range = r; } //System.out.println("range returns " + range); return range; } public long encodeFeature(int[][] ifs, int[] ofs, int index) { long out = id; for(int i = 0; i < fs.length; i++) { if(fs[i][0] == 0) { int row = -fs[i][1]; int f = ofs[row]; if(f < 0 || f > fs[i][3]) throw new RuntimeException("feature value too large #1 (" + templateString + ")"); out |= (long) f << offs[i]; } else { int col = fs[i][0] - 1; int row = index + fs[i][1]; int f; if(row < 0 || row >= ifs.length) f = OUTSIDE_LABEL; else { f = ifs[row][col]; if(f < 0 || f > fs[i][3]) { for(int j1 = 0; j1 < ifs.length; j1++) { for(int j2 = 0; j2 < ifs[j1].length; j2++) System.out.print(ifs[j1][j2] + "\t"); System.out.println(); } throw new RuntimeException("feature value too large #2 (" + f + " / " + fs[i][3] + ") (" + templateString + ")"); } } out |= (long) f << offs[i]; } } return out; } public String featureToString(long l) { return null; } }