/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Buckwalter
implements SerializableFunction<String, String> {
    private static final long serialVersionUID = 4351710914246859336L;
    boolean outputUnicodeValues = false;
    private final char[] arabicChars = new char[]{'\u0621', '\u0622', '\u0623', '\u0624', '\u0625', '\u0626', '\u0627', '\u0628', '\u0629', '\u062a', '\u062b', '\u062c', '\u062d', '\u062e', '\u062f', '\u0630', '\u0631', '\u0632', '\u0633', '\u0634', '\u0635', '\u0636', '\u0637', '\u0638', '\u0639', '\u063a', '\u0640', '\u0641', '\u0642', '\u0643', '\u0644', '\u0645', '\u0646', '\u0647', '\u0648', '\u0649', '\u064a', '\u064b', '\u064c', '\u064d', '\u064e', '\u064f', '\u0650', '\u0651', '\u0652', '\u0670', '\u0671', '\u067e', '\u0686', '\u0698', '\u06a4', '\u06af', '\u0625', '\u0623', '\u0624', '\u060c', '\u061b', '\u061f', '\u066a', '\u066b', '\u06f0', '\u06f1', '\u06f2', '\u06f3', '\u06f4', '\u06f5', '\u06f6', '\u06f7', '\u06f8', '\u06f9', '\u0660', '\u0661', '\u0662', '\u0663', '\u0664', '\u0665', '\u0666', '\u0667', '\u0668', '\u0669', '\u00ab', '\u00bb'};
    private final char[] buckChars = new char[]{'\'', '|', '>', '&', '<', '}', 'A', 'b', 'p', 't', 'v', 'j', 'H', 'x', 'd', '*', 'r', 'z', 's', '$', 'S', 'D', 'T', 'Z', 'E', 'g', '_', 'f', 'q', 'k', 'l', 'm', 'n', 'h', 'w', 'Y', 'y', 'F', 'N', 'K', 'a', 'u', 'i', '~', 'o', '`', '{', 'P', 'J', 'R', 'V', 'G', 'I', 'O', 'W', ',', ';', '?', '%', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '\"', '\"'};
    private boolean unicode2Buckwalter = false;
    private final HashMap<Character, Character> u2bMap;
    private final HashMap<Character, Character> b2uMap;
    private ClassicCounter<String> unmappable;
    private static boolean DEBUG = false;
    private static final boolean PASS_ASCII_IN_UNICODE = true;
    private static boolean SUPPRESS_DIGIT_MAPPING_IN_B2A = true;
    private static boolean SUPPRESS_PUNC_MAPPING_IN_B2A = true;
    private static final Pattern latinPunc = Pattern.compile("[\"\\?%,-;\\._]+");
    private static final StringBuilder usage = new StringBuilder();

    public Buckwalter() {
        if (this.arabicChars.length != this.buckChars.length) {
            throw new RuntimeException(this.getClass().getName() + ": Inconsistent u2b/b2u arrays.");
        }
        this.u2bMap = new HashMap(this.arabicChars.length);
        this.b2uMap = new HashMap(this.buckChars.length);
        for (int i = 0; i < this.arabicChars.length; ++i) {
            Character charU = Character.valueOf(this.arabicChars[i]);
            Character charB = Character.valueOf(this.buckChars[i]);
            this.u2bMap.put(charU, charB);
            this.b2uMap.put(charB, charU);
        }
        if (DEBUG) {
            this.unmappable = new ClassicCounter();
        }
    }

    public Buckwalter(boolean unicodeToBuckwalter) {
        this();
        this.unicode2Buckwalter = unicodeToBuckwalter;
    }

    public void suppressBuckDigitConversion(boolean b) {
        SUPPRESS_DIGIT_MAPPING_IN_B2A = b;
    }

    public void suppressBuckPunctConversion(boolean b) {
        SUPPRESS_PUNC_MAPPING_IN_B2A = b;
    }

    @Override
    public String apply(String in) {
        return this.convert(in, this.unicode2Buckwalter);
    }

    public String buckwalterToUnicode(String in) {
        return this.convert(in, false);
    }

    public String unicodeToBuckwalter(String in) {
        return this.convert(in, true);
    }

    private String convert(String in, boolean unicodeToBuckwalter) {
        StringTokenizer st = new StringTokenizer(in);
        StringBuilder result = new StringBuilder(in.length());
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            for (int i = 0; i < token.length(); ++i) {
                if (ATBTreeUtils.reservedWords.contains(token)) {
                    result.append(token);
                    break;
                }
                Character inCh = Character.valueOf(token.charAt(i));
                Character outCh = null;
                outCh = unicodeToBuckwalter ? (inCh.charValue() < '\u007f' ? inCh : this.u2bMap.get(inCh)) : (SUPPRESS_DIGIT_MAPPING_IN_B2A && Character.isDigit(inCh.charValue()) || SUPPRESS_PUNC_MAPPING_IN_B2A && latinPunc.matcher(inCh.toString()).matches() ? inCh : this.b2uMap.get(inCh));
                if (outCh == null) {
                    if (DEBUG) {
                        String key = inCh + "[U+" + StringUtils.padLeft(Integer.toString(inCh.charValue(), 16).toUpperCase(), 4, '0') + ']';
                        this.unmappable.incrementCount(key);
                    }
                    result.append(inCh);
                    continue;
                }
                if (this.outputUnicodeValues) {
                    result.append("\\u").append(StringUtils.padLeft(Integer.toString(inCh.charValue(), 16).toUpperCase(), 4, '0'));
                    continue;
                }
                result.append(outCh);
            }
            result.append(" ");
        }
        return result.toString().trim();
    }

    public static void main(String[] args) {
        int j;
        boolean unicodeToBuck = false;
        boolean outputUnicodeValues = false;
        File inputFile = null;
        for (int i = 0; i < args.length; ++i) {
            if (args[i].startsWith("-")) {
                if (args[i].equals("-u2b")) {
                    unicodeToBuck = true;
                    continue;
                }
                if (args[i].equals("-o")) {
                    outputUnicodeValues = false;
                    continue;
                }
                if (args[i].equals("-d")) {
                    DEBUG = true;
                    continue;
                }
                System.out.println(usage.toString());
                return;
            }
            if (i == args.length) continue;
            inputFile = new File(args[i]);
            break;
        }
        Buckwalter b = new Buckwalter(unicodeToBuck);
        b.outputUnicodeValues = outputUnicodeValues;
        int n = j = b.outputUnicodeValues ? 2 : Integer.MAX_VALUE;
        if (j < args.length) {
            while (j < args.length) {
                EncodingPrintWriter.out.println(args[j] + " -> " + b.apply(args[j]), "utf-8");
                ++j;
            }
        } else {
            int numLines = 0;
            try {
                String line;
                BufferedReader br = inputFile == null ? new BufferedReader(new InputStreamReader(System.in, "utf-8")) : new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(inputFile), "utf-8"));
                System.err.printf("Reading input...", new Object[0]);
                while ((line = br.readLine()) != null) {
                    EncodingPrintWriter.out.println(b.apply(line), "utf-8");
                    ++numLines;
                }
                br.close();
                System.err.printf("done.\nConverted %d lines from %s.\n", numLines, unicodeToBuck ? "UTF-8 to Buckwalter" : "Buckwalter to UTF-8");
            }
            catch (UnsupportedEncodingException e) {
                System.err.println("ERROR: File system does not support UTF-8 encoding.");
            }
            catch (FileNotFoundException e) {
                System.err.println("ERROR: File does not exist: " + inputFile.getPath());
            }
            catch (IOException e) {
                System.err.printf("ERROR: IO exception while reading file (line %d).\n", numLines);
            }
        }
        if (DEBUG) {
            if (!b.unmappable.keySet().isEmpty()) {
                EncodingPrintWriter.err.println("Characters that could not be converted [passed through!]:", "utf-8");
                EncodingPrintWriter.err.println(b.unmappable.toString(), "utf-8");
            } else {
                EncodingPrintWriter.err.println("All characters successfully converted!", "utf-8");
            }
        }
    }

    static {
        usage.append("Usage: java Buckwalter [OPTS] file   (or < file)\n");
        usage.append("Options:\n");
        usage.append("          -u2b : Unicode -> Buckwalter (default is Buckwalter -> Unicode).\n");
        usage.append("          -d   : Debug mode.\n");
        usage.append("          -o   : Output unicode values.\n");
    }
}

