/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.international.arabic.Buckwalter;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.trees.treebank.Mapper;
import edu.stanford.nlp.util.Generics;
import java.io.File;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DefaultLexicalMapper
implements Mapper,
Serializable {
    private static final long serialVersionUID = -3798804368296999785L;
    private final Pattern utf8ArabicChart = Pattern.compile("[\u0600-\u06ff]");
    private final String bwAlefChar = "A";
    private final Pattern bwDiacritics = Pattern.compile("F|N|K|a|u|i|\\~|o");
    private final Pattern bwTatweel = Pattern.compile("_");
    private final Pattern bwAlef = Pattern.compile("\\{|\\||>|<");
    private final Pattern bwQuran = Pattern.compile("`");
    private final Pattern bwNullAnaphoraMarker = Pattern.compile("\\[nll\\]");
    public final Pattern latinPunc = Pattern.compile("([!-/:-@\\u005B-`{-~\u00a1-\u00bf\u00f7\u2010-\u2027\u2030-\u205e\u20a0-\u20ba])+");
    public final Pattern arabicPunc = Pattern.compile("([\u00ab\u00bb\u0609-\u060d\u061b-\u061f\u066a\u066c-\u066d\u06d4])+");
    public final Pattern arabicDigit = Pattern.compile("([\u06f0-\u06f9\u0660-\u0669])+");
    private final Pattern utf8Diacritics = Pattern.compile("\u064e|\u064b|\u064f|\u064c|\u0650|\u064d|\u0651|\u0652|\u0670");
    private final Pattern utf8Tatweel = Pattern.compile("\u0640");
    private final Pattern utf8Alef = Pattern.compile("\u0627|\u0625|\u0623|\u0622|\u0671");
    private final Pattern utf8Quran = Pattern.compile("[\u0615-\u061a\u06d6-\u06e5]");
    private final Pattern utf8ProDrop = Pattern.compile("\\[\u0646\u0644\u0644\\]");
    public final Pattern segmentationMarker = Pattern.compile("^-+|-+$");
    private final Pattern morphemeBoundary = Pattern.compile("\\+");
    private final Pattern hasDigit = Pattern.compile("\\d+");
    private boolean useATBVocalizedSectionMapping = false;
    private boolean stripMorphemeMarkersInUTF8 = false;
    private boolean stripSegmentationMarkersInUTF8 = false;
    private final String parentTagString = "PUNC LATIN -NONE-";
    private final Set<String> parentTagsToEscape = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList("PUNC LATIN -NONE-".split("\\s+"))));
    private final String utf8CliticString = "\u0644 \u0641 \u0648 \u0645\u0627 \u0647 \u0647\u0627 \u0647\u0645 \u0647\u0646 \u0646\u0627 \u0643\u0645 \u062a\u0646 \u062a\u0645 \u0649 \u064a \u0647\u0645\u0627 \u0643 \u0628 \u0645";
    private final Set<String> bwClitics;

    public DefaultLexicalMapper() {
        Buckwalter bw = new Buckwalter(true);
        String bwString = bw.apply("\u0644 \u0641 \u0648 \u0645\u0627 \u0647 \u0647\u0627 \u0647\u0645 \u0647\u0646 \u0646\u0627 \u0643\u0645 \u062a\u0646 \u062a\u0645 \u0649 \u064a \u0647\u0645\u0627 \u0643 \u0628 \u0645");
        this.bwClitics = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(bwString.split("\\s+"))));
    }

    private String mapUtf8(String element) {
        String strippedElem;
        Matcher rmMorphemeBoundary;
        String strippedElem2;
        Matcher latinPuncOnly = this.latinPunc.matcher(element);
        Matcher arbPuncOnly = this.arabicPunc.matcher(element);
        if (latinPuncOnly.matches() || arbPuncOnly.matches()) {
            return element;
        }
        Matcher rmDiacritics = this.utf8Diacritics.matcher(element);
        if ((element = rmDiacritics.replaceAll("")).length() > 1) {
            Matcher rmTatweel = this.utf8Tatweel.matcher(element);
            element = rmTatweel.replaceAll("");
        }
        Matcher normAlef = this.utf8Alef.matcher(element);
        element = normAlef.replaceAll("\u0627");
        Matcher rmQuran = this.utf8Quran.matcher(element);
        element = rmQuran.replaceAll("");
        Matcher rmProDrop = this.utf8ProDrop.matcher(element);
        element = rmProDrop.replaceAll("");
        if (this.stripMorphemeMarkersInUTF8 && (strippedElem2 = (rmMorphemeBoundary = this.morphemeBoundary.matcher(element)).replaceAll("")).length() > 0) {
            element = strippedElem2;
        }
        if (this.stripSegmentationMarkersInUTF8 && (strippedElem = this.segmentationMarker.matcher(element).replaceAll("")).length() > 0) {
            element = strippedElem;
        }
        return element;
    }

    private String mapBuckwalter(String element) {
        Matcher puncOnly = this.latinPunc.matcher(element);
        if (puncOnly.matches()) {
            return element;
        }
        Matcher rmDiacritics = this.bwDiacritics.matcher(element);
        if ((element = rmDiacritics.replaceAll("")).length() > 1) {
            Matcher rmTatweel = this.bwTatweel.matcher(element);
            element = rmTatweel.replaceAll("");
        }
        Matcher normAlef = this.bwAlef.matcher(element);
        element = normAlef.replaceAll("A");
        Matcher rmQuran = this.bwQuran.matcher(element);
        element = rmQuran.replaceAll("");
        Matcher rmProDrop = this.bwNullAnaphoraMarker.matcher(element);
        element = rmProDrop.replaceAll("");
        if (this.useATBVocalizedSectionMapping && element.length() > 1) {
            String strippedElem;
            Matcher rmMorphemeBoundary = this.morphemeBoundary.matcher(element);
            Matcher cliticMarker = this.segmentationMarker.matcher(element = rmMorphemeBoundary.replaceAll(""));
            if (cliticMarker.find() && !this.hasDigit.matcher(element).find() && (strippedElem = cliticMarker.replaceAll("")).length() > 0) {
                element = this.bwClitics.contains(strippedElem) ? element : strippedElem;
            }
        } else if (element.length() > 1 && !ATBTreeUtils.reservedWords.contains(element)) {
            Matcher rmCliticMarker = this.segmentationMarker.matcher(element);
            element = rmCliticMarker.replaceAll("");
        }
        return element;
    }

    @Override
    public String map(String parent, String element) {
        String elem = element.trim();
        if (parent != null && this.parentTagsToEscape.contains(parent)) {
            return elem;
        }
        Matcher utf8Encoding = this.utf8ArabicChart.matcher(elem);
        return utf8Encoding.find() ? this.mapUtf8(elem) : this.mapBuckwalter(elem);
    }

    @Override
    public void setup(File path, String ... options) {
        if (options == null) {
            return;
        }
        block10: for (int i = 0; i < options.length; ++i) {
            String opt;
            switch (opt = options[i]) {
                case "ATBVocalizedSection": {
                    this.useATBVocalizedSectionMapping = true;
                    continue block10;
                }
                case "StripSegMarkersInUTF8": {
                    this.stripSegmentationMarkersInUTF8 = true;
                    continue block10;
                }
                case "StripMorphMarkersInUTF8": {
                    this.stripMorphemeMarkersInUTF8 = true;
                }
            }
        }
    }

    @Override
    public boolean canChangeEncoding(String parent, String element) {
        parent = parent.trim();
        element = element.trim();
        if (parent.contains("NUMERIC_COMMA") || parent.contains("PUNC") && element.equals("r")) {
            return true;
        }
        Matcher numMatcher = this.hasDigit.matcher(element);
        return !numMatcher.find() && !this.parentTagsToEscape.contains(parent);
    }

    public static void main(String[] args) {
        DefaultLexicalMapper m = new DefaultLexicalMapper();
        System.out.printf("< :-> %s\n", m.map(null, "FNKqq"));
    }
}

