/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.analysis.ko;

import java.io.IOException;
import java.util.EnumMap;
import org.apache.lucene.analysis.ko.DecompoundToken;
import org.apache.lucene.analysis.ko.DictionaryToken;
import org.apache.lucene.analysis.ko.KoreanTokenizer;
import org.apache.lucene.analysis.ko.POS;
import org.apache.lucene.analysis.ko.Token;
import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
import org.apache.lucene.analysis.ko.dict.KoMorphData;
import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;
import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
import org.apache.lucene.analysis.ko.dict.UserDictionary;
import org.apache.lucene.analysis.morph.BinaryDictionary;
import org.apache.lucene.analysis.morph.ConnectionCosts;
import org.apache.lucene.analysis.morph.Dictionary;
import org.apache.lucene.analysis.morph.GraphvizFormatter;
import org.apache.lucene.analysis.morph.MorphData;
import org.apache.lucene.analysis.morph.TokenInfoFST;
import org.apache.lucene.analysis.morph.TokenType;
import org.apache.lucene.analysis.morph.Viterbi;
import org.apache.lucene.util.fst.FST;

final class Viterbi
extends org.apache.lucene.analysis.morph.Viterbi<Token, Viterbi.Position> {
    private final EnumMap<TokenType, Dictionary<? extends KoMorphData>> dictionaryMap = new EnumMap(TokenType.class);
    private final UnknownDictionary unkDictionary;
    private final CharacterDefinition characterDefinition;
    private final boolean discardPunctuation;
    private final KoreanTokenizer.DecompoundMode mode;
    private final boolean outputUnknownUnigrams;
    private GraphvizFormatter<KoMorphData> dotOut;

    Viterbi(TokenInfoFST fst, FST.BytesReader fstReader, TokenInfoDictionary dictionary, TokenInfoFST userFST, FST.BytesReader userFSTReader, UserDictionary userDictionary, ConnectionCosts costs, UnknownDictionary unkDictionary, CharacterDefinition characterDefinition, boolean discardPunctuation, KoreanTokenizer.DecompoundMode mode, boolean outputUnknownUnigrams) {
        super(fst, fstReader, (BinaryDictionary)dictionary, userFST, userFSTReader, (Dictionary)userDictionary, costs, Viterbi.Position.class);
        this.unkDictionary = unkDictionary;
        this.characterDefinition = characterDefinition;
        this.discardPunctuation = discardPunctuation;
        this.mode = mode;
        this.outputUnknownUnigrams = outputUnknownUnigrams;
        this.enableSpacePenaltyFactor = true;
        this.outputLongestUserEntryOnly = true;
        this.dictionaryMap.put(TokenType.KNOWN, (Dictionary<? extends KoMorphData>)dictionary);
        this.dictionaryMap.put(TokenType.UNKNOWN, (Dictionary<? extends KoMorphData>)unkDictionary);
        this.dictionaryMap.put(TokenType.USER, userDictionary);
    }

    protected int processUnknownWord(boolean anyMatches, Viterbi.Position posData) throws IOException {
        char firstCharacter = (char)this.buffer.get(this.pos);
        if (!anyMatches || this.characterDefinition.isInvoke(firstCharacter)) {
            int unknownWordLength;
            byte characterId = this.characterDefinition.getCharacterClass(firstCharacter);
            if (!this.characterDefinition.isGroup(firstCharacter)) {
                unknownWordLength = 1;
            } else {
                int next;
                unknownWordLength = 1;
                Character.UnicodeScript scriptCode = Character.UnicodeScript.of(firstCharacter);
                boolean isPunct = Viterbi.isPunctuation(firstCharacter);
                boolean isDigit = Character.isDigit(firstCharacter);
                int posAhead = this.pos + 1;
                while (unknownWordLength < 1024 && (next = this.buffer.get(posAhead)) != -1) {
                    boolean sameScript;
                    char ch = (char)next;
                    int chType = Character.getType(ch);
                    Character.UnicodeScript sc = Character.UnicodeScript.of(next);
                    boolean bl = sameScript = Viterbi.isSameScript(scriptCode, sc) || chType == 6;
                    if (!sameScript || Viterbi.isPunctuation(ch, chType) != isPunct || Character.isDigit(ch) != isDigit || !this.characterDefinition.isGroup(ch)) break;
                    ++unknownWordLength;
                    if (Viterbi.isCommonOrInherited(scriptCode) && !Viterbi.isCommonOrInherited(sc)) {
                        scriptCode = sc;
                        characterId = this.characterDefinition.getCharacterClass(ch);
                    }
                    ++posAhead;
                }
            }
            this.unkDictionary.lookupWordIds(characterId, this.wordIdRef);
            for (int ofs = 0; ofs < this.wordIdRef.length; ++ofs) {
                this.add(this.unkDictionary.getMorphAttributes(), posData, this.pos, this.pos + unknownWordLength, this.wordIdRef.ints[this.wordIdRef.offset + ofs], TokenType.UNKNOWN, false);
            }
        }
        return 0;
    }

    void setGraphvizFormatter(GraphvizFormatter<KoMorphData> dotOut) {
        this.dotOut = dotOut;
    }

    protected void backtrace(Viterbi.Position endPosData, int fromIDX) {
        int endPos = endPosData.getPos();
        if (endPos == this.lastBackTracePos) {
            return;
        }
        char[] fragment = this.buffer.get(this.lastBackTracePos, endPos - this.lastBackTracePos);
        if (this.dotOut != null) {
            this.dotOut.onBacktrace(this::getDict, this.positions, this.lastBackTracePos, endPosData, fromIDX, fragment, this.end);
        }
        int pos = endPos;
        int bestIDX = fromIDX;
        while (pos > this.lastBackTracePos) {
            Viterbi.Position posData = this.positions.get(pos);
            assert (bestIDX < posData.getCount());
            int backPos = posData.getBackPos(bestIDX);
            int backWordPos = posData.getBackWordPos(bestIDX);
            assert (backPos >= this.lastBackTracePos) : "backPos=" + backPos + " vs lastBackTracePos=" + this.lastBackTracePos;
            int length = pos - backWordPos;
            TokenType backType = posData.getBackType(bestIDX);
            int backID = posData.getBackID(bestIDX);
            int nextBestIDX = posData.getBackIndex(bestIDX);
            int fragmentOffset = backWordPos - this.lastBackTracePos;
            assert (fragmentOffset >= 0);
            Dictionary<? extends KoMorphData> dict = this.getDict(backType);
            if (this.outputUnknownUnigrams && backType == TokenType.UNKNOWN) {
                for (int i = length - 1; i >= 0; --i) {
                    int charLen = 1;
                    if (i > 0 && Character.isLowSurrogate(fragment[fragmentOffset + i])) {
                        --i;
                        charLen = 2;
                    }
                    DictionaryToken token = new DictionaryToken(TokenType.UNKNOWN, this.unkDictionary.getMorphAttributes(), CharacterDefinition.NGRAM, fragment, fragmentOffset + i, charLen, backWordPos + i, backWordPos + i + charLen);
                    this.pending.add(token);
                }
            } else {
                DictionaryToken token = new DictionaryToken(backType, (KoMorphData)dict.getMorphAttributes(), backID, fragment, fragmentOffset, length, backWordPos, backWordPos + length);
                if (token.getPOSType() == POS.Type.MORPHEME || this.mode == KoreanTokenizer.DecompoundMode.NONE) {
                    if (!this.shouldFilterToken(token)) {
                        this.pending.add(token);
                    }
                } else {
                    KoMorphData.Morpheme[] morphemes = token.getMorphemes();
                    if (morphemes == null) {
                        this.pending.add(token);
                    } else {
                        int endOffset = backWordPos + length;
                        int posLen = 0;
                        for (int i = morphemes.length - 1; i >= 0; --i) {
                            DecompoundToken compoundToken;
                            KoMorphData.Morpheme morpheme = morphemes[i];
                            if (token.getPOSType() == POS.Type.COMPOUND) {
                                assert (endOffset - morpheme.surfaceForm().length() >= 0);
                                compoundToken = new DecompoundToken(morpheme.posTag(), morpheme.surfaceForm(), endOffset - morpheme.surfaceForm().length(), endOffset, backType);
                            } else {
                                compoundToken = new DecompoundToken(morpheme.posTag(), morpheme.surfaceForm(), token.getStartOffset(), token.getEndOffset(), backType);
                            }
                            if (i == 0 && this.mode == KoreanTokenizer.DecompoundMode.MIXED) {
                                compoundToken.setPositionIncrement(0);
                            }
                            ++posLen;
                            endOffset -= morpheme.surfaceForm().length();
                            this.pending.add(compoundToken);
                        }
                        if (this.mode == KoreanTokenizer.DecompoundMode.MIXED) {
                            token.setPositionLength(Math.max(1, posLen));
                            this.pending.add(token);
                        }
                    }
                }
            }
            if (!this.discardPunctuation && backWordPos != backPos) {
                int offset = backPos - this.lastBackTracePos;
                int len = backWordPos - backPos;
                this.unkDictionary.lookupWordIds(this.characterDefinition.getCharacterClass(' '), this.wordIdRef);
                DictionaryToken spaceToken = new DictionaryToken(TokenType.UNKNOWN, this.unkDictionary.getMorphAttributes(), this.wordIdRef.ints[this.wordIdRef.offset], fragment, offset, len, backPos, backPos + len);
                this.pending.add(spaceToken);
            }
            pos = backPos;
            bestIDX = nextBestIDX;
        }
        this.lastBackTracePos = endPos;
        this.buffer.freeBefore(endPos);
        this.positions.freeBefore(endPos);
    }

    protected int computeSpacePenalty(MorphData morphData, int wordID, int numSpaces) {
        POS.Tag leftPOS = ((KoMorphData)morphData).getLeftPOS(wordID);
        int spacePenalty = 0;
        if (numSpaces > 0) {
            switch (leftPOS) {
                case EP: 
                case EF: 
                case EC: 
                case ETN: 
                case ETM: 
                case JKS: 
                case JKC: 
                case JKG: 
                case JKO: 
                case JKB: 
                case JKV: 
                case JKQ: 
                case JX: 
                case JC: 
                case VCP: 
                case XSA: 
                case XSN: 
                case XSV: {
                    spacePenalty = 3000;
                    break;
                }
            }
        }
        return spacePenalty;
    }

    Dictionary<? extends KoMorphData> getDict(TokenType type) {
        return this.dictionaryMap.get(type);
    }

    private boolean shouldFilterToken(Token token) {
        return this.discardPunctuation && Viterbi.isPunctuation(token.getSurfaceForm()[token.getOffset()]);
    }

    private static boolean isPunctuation(char ch) {
        return Viterbi.isPunctuation(ch, Character.getType(ch));
    }

    private static boolean isPunctuation(char ch, int cid) {
        if (ch == '\u318d') {
            return true;
        }
        switch (cid) {
            case 12: 
            case 13: 
            case 14: 
            case 15: 
            case 16: 
            case 20: 
            case 21: 
            case 22: 
            case 23: 
            case 24: 
            case 25: 
            case 26: 
            case 27: 
            case 28: 
            case 29: 
            case 30: {
                return true;
            }
        }
        return false;
    }

    private static boolean isCommonOrInherited(Character.UnicodeScript script) {
        return script == Character.UnicodeScript.INHERITED || script == Character.UnicodeScript.COMMON;
    }

    private static boolean isSameScript(Character.UnicodeScript scriptOne, Character.UnicodeScript scriptTwo) {
        return scriptOne == scriptTwo || Viterbi.isCommonOrInherited(scriptOne) || Viterbi.isCommonOrInherited(scriptTwo);
    }
}

