package de.uniulm.mathematik.typo.hyphen; import java.io.*; import java.util.*; public class HyphenationWordListLoader { private static String readField(BufferedReader in) throws IOException { StringBuffer buf = new StringBuffer(); in.mark(1); int ch; boolean comment = false; while ((ch = in.read()) >= 0 && ch != ';' && ch != '\n') { if (ch == '#') { comment = true; } if (!comment && ch != ' ') { buf.append((char) ch); } in.mark(1); } in.reset(); if (buf.length() == 0 && ch < 0) { return null; } if (buf.length() == 3 && buf.charAt(0) == '-' && buf.charAt(2) == '-') { return ""; } return buf.toString(); } private static ArrayList readEntry(BufferedReader in) throws IOException { ArrayList fields = new ArrayList(); String field; boolean eol = false; while (!eol && (field = readField(in)) != null) { fields.add(field); int ch = in.read(); if (ch == '\n' || ch < 0) { eol = true; } } if (fields.size() == 0) { return null; } else { return fields; } } private static WeightedHyphenationRule parseRule(String ruleText) { LinkedList points = new LinkedList(); int pos = 0; StringBuffer hyphenation = null; boolean alternatives = false; boolean firstAlternative = true; boolean special = false; boolean firstSpecial = true; for (int i = 0; i < ruleText.length(); ++i) { char ch = ruleText.charAt(i); if (ch == '<' || ch == '>' || ch == '-' || ch == '=' || ch == '.') { if (hyphenation == null) { hyphenation = new StringBuffer(); } hyphenation.append(ch); } else if (ch == '[') { alternatives = true; firstAlternative = true; } else if (ch == ']') { alternatives = false; } else if (ch == '{') { special = true; } else if (ch == '}') { special = false; } else if (ch == '/') { if (special) { firstSpecial = false; } else { firstAlternative = false; } } else if ((!alternatives || firstAlternative) && (!special || firstSpecial)) { if (!alternatives && !special && hyphenation != null) { int weight = 1; char first = hyphenation.charAt(0); int len = hyphenation.length(); char last = hyphenation.charAt(len-1); boolean bad = hyphenation.indexOf(".") != -1; if (bad) { weight = - 4 - len; } else if (first == '=') { weight = len + 4; } else if (first == '<' || last == '>') { weight = len + 3; } else if (first == '-') { weight = 2; } WeightedHyphenationPoint hp = new SingleWeightedHyphenationPoint(pos, weight); points.addFirst(hp); hyphenation = null; } ++pos; } } if (points.size() == 0) { return null; } // now we need to reverse the list of entries WeightedHyphenationEntry rule = null; for (WeightedHyphenationPoint hp: points) { rule = new WeightedHyphenationEntry(hp.getPosition(), hp.getWeight(), rule); } return rule; } public static HyphenationWordList load(BufferedReader in) throws IOException { HyphenationWordList hwl = new HyphenationWordList(); ArrayList fields; while ((fields = readEntry(in)) != null) { if (fields.size() >= 2) { String word = fields.get(0); String rule = fields.get(1); if (rule.length() == 0 && fields.size() >= 3) { /* select traditional hyphenation */ rule = fields.get(2); } if (rule != "") { WeightedHyphenationRule wrule = parseRule(rule); if (wrule != null) { hwl.add(word, wrule); } } } } return hwl; } public static HyphenationWordList load(String filename) throws FileNotFoundException, IOException { return load(new BufferedReader(new FileReader(filename))); } }