/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.french.process;

import edu.stanford.nlp.international.french.process.FrenchLexer;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.WordTokenFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;

public class FrenchTokenizer<T extends HasWord>
extends AbstractTokenizer<T> {
    private static final Redwood.RedwoodChannels log = Redwood.channels(FrenchTokenizer.class);
    private final FrenchLexer lexer;
    private static final String SPLIT_COMPOUNDS_OPTION = "splitCompounds";
    private static final String SPLIT_CONTRACTIONS_OPTION = "splitContractions";
    private final boolean splitCompounds;
    private final boolean splitContractions;
    private List<CoreLabel> compoundBuffer;
    public static final String FTB_OPTIONS = "ellipses=ptb3,normalizeParentheses=true,ptb3Dashes=false,splitContractions=true,splitCompounds=true";
    public static final String DEFAULT_OPTIONS = "invertible,splitCompounds=false,splitContractions=false,quotes=ORIGINAL";

    public FrenchTokenizer(Reader r, LexedTokenFactory<T> tf, Properties lexerProperties, boolean splitCompounds, boolean splitContractions) {
        this.lexer = new FrenchLexer(r, tf, lexerProperties);
        this.splitCompounds = splitCompounds;
        this.splitContractions = splitContractions;
        if (splitCompounds || splitContractions) {
            this.compoundBuffer = Generics.newLinkedList();
        }
    }

    @Override
    protected T getNext() {
        try {
            CoreLabel cl;
            HasWord nextToken = null;
            do {
                HasWord hasWord = nextToken = (this.splitContractions || this.splitCompounds) && this.compoundBuffer.size() > 0 ? (HasWord)this.compoundBuffer.remove(0) : (HasWord)this.lexer.next();
            } while (nextToken != null && nextToken.word().length() == 0);
            if (this.splitCompounds && nextToken instanceof CoreLabel && (cl = (CoreLabel)nextToken).containsKey(CoreAnnotations.ParentAnnotation.class) && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("comp")) {
                nextToken = this.processCompound(cl);
            }
            if (this.splitContractions && nextToken instanceof CoreLabel && (cl = (CoreLabel)nextToken).containsKey(CoreAnnotations.ParentAnnotation.class) && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("contraction")) {
                nextToken = this.processContraction(cl);
            }
            return (T)nextToken;
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    private static CoreLabel copyCoreLabel(CoreLabel cl, String part, int beginPosition, int endPosition) {
        CoreLabel newLabel = new CoreLabel(cl);
        newLabel.setWord(part);
        newLabel.setValue(part);
        newLabel.setBeginPosition(beginPosition);
        newLabel.setEndPosition(endPosition);
        newLabel.set(CoreAnnotations.OriginalTextAnnotation.class, part);
        return newLabel;
    }

    private static CoreLabel copyCoreLabel(CoreLabel cl, String part, int beginPosition) {
        return FrenchTokenizer.copyCoreLabel(cl, part, beginPosition, beginPosition + part.length());
    }

    private CoreLabel processCompound(CoreLabel cl) {
        String[] parts;
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        for (String part : parts = cl.word().replaceAll("-", " - ").split("\\s+")) {
            CoreLabel newLabel = new CoreLabel(cl);
            newLabel.setWord(part);
            newLabel.setValue(part);
            newLabel.set(CoreAnnotations.OriginalTextAnnotation.class, part);
            this.compoundBuffer.add(newLabel);
        }
        return this.compoundBuffer.remove(0);
    }

    private CoreLabel processContraction(CoreLabel cl) {
        String second;
        String first;
        String lowered;
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        String word = cl.word();
        int secondOffset = 0;
        int secondLength = 0;
        switch (lowered = word.toLowerCase()) {
            case "au": {
                first = "\u00e0";
                second = "le";
                secondOffset = 1;
                secondLength = 1;
                break;
            }
            case "aux": {
                first = "\u00e0";
                second = "les";
                secondOffset = 1;
                secondLength = 2;
                break;
            }
            case "du": {
                first = "de";
                second = "le";
                secondOffset = 1;
                secondLength = 1;
                break;
            }
            default: {
                throw new IllegalArgumentException("Invalid contraction provided to processContraction");
            }
        }
        int secondStart = cl.beginPosition() + secondOffset;
        int secondEnd = secondStart + secondLength;
        this.compoundBuffer.add(FrenchTokenizer.copyCoreLabel(cl, second, secondStart, secondEnd));
        return FrenchTokenizer.copyCoreLabel(cl, first, cl.beginPosition(), secondStart);
    }

    public static TokenizerFactory<CoreLabel> factory() {
        return FrenchTokenizerFactory.newTokenizerFactory();
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(LexedTokenFactory<T> factory, String options) {
        return new FrenchTokenizerFactory(factory, options);
    }

    public static TokenizerFactory<CoreLabel> ftbFactory() {
        TokenizerFactory<CoreLabel> tf = FrenchTokenizerFactory.newTokenizerFactory();
        tf.setOptions(FTB_OPTIONS);
        return tf;
    }

    private static String usage() {
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        sb.append(String.format("Usage: java %s [OPTIONS] < file%n%n", FrenchTokenizer.class.getName()));
        sb.append("Options:").append(nl);
        sb.append("   -help          : Print this message.").append(nl);
        sb.append("   -ftb           : Tokenization for experiments in Green et al. (2011).").append(nl);
        sb.append("   -lowerCase     : Apply lowercasing.").append(nl);
        sb.append("   -encoding type : Encoding format.").append(nl);
        sb.append("   -options str   : Orthographic options (see FrenchLexer.java)").append(nl);
        return sb.toString();
    }

    private static Map<String, Integer> argOptionDefs() {
        Map<String, Integer> argOptionDefs = Generics.newHashMap();
        argOptionDefs.put("help", 0);
        argOptionDefs.put("ftb", 0);
        argOptionDefs.put("lowerCase", 0);
        argOptionDefs.put("encoding", 1);
        argOptionDefs.put("options", 1);
        return argOptionDefs;
    }

    public static void main(String[] args) {
        Properties options = StringUtils.argsToProperties(args, FrenchTokenizer.argOptionDefs());
        if (options.containsKey("help")) {
            log.info(FrenchTokenizer.usage());
            return;
        }
        TokenizerFactory<CoreLabel> tf = options.containsKey("ftb") ? FrenchTokenizer.ftbFactory() : FrenchTokenizer.factory();
        String orthoOptions = options.getProperty("options", "");
        orthoOptions = orthoOptions.isEmpty() ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs";
        tf.setOptions(orthoOptions);
        String encoding = options.getProperty("encoding", "UTF-8");
        boolean toLower = PropertiesUtils.getBool(options, "lowerCase", false);
        int nLines = 0;
        int nTokens = 0;
        long startTime = System.nanoTime();
        try {
            Tokenizer<CoreLabel> tokenizer = tf.getTokenizer(new InputStreamReader(System.in, encoding));
            boolean printSpace = false;
            while (tokenizer.hasNext()) {
                ++nTokens;
                String word = ((CoreLabel)tokenizer.next()).word();
                if (word.equals("*NL*")) {
                    ++nLines;
                    printSpace = false;
                    System.out.println();
                    continue;
                }
                if (printSpace) {
                    System.out.print(" ");
                }
                String outputToken = toLower ? word.toLowerCase(Locale.FRENCH) : word;
                System.out.print(outputToken);
                printSpace = true;
            }
        }
        catch (UnsupportedEncodingException e) {
            log.error(e);
        }
        long elapsedTime = System.nanoTime() - startTime;
        double linesPerSec = (double)nLines / ((double)elapsedTime / 1.0E9);
        System.err.printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec);
    }

    public static class FrenchTokenizerFactory<T extends HasWord>
    implements TokenizerFactory<T>,
    Serializable {
        private static final long serialVersionUID = 946818805507187330L;
        protected final LexedTokenFactory<T> factory;
        protected Properties lexerProperties = new Properties();
        protected boolean splitCompoundOption = false;
        protected boolean splitContractionOption = true;

        public static TokenizerFactory<CoreLabel> newTokenizerFactory() {
            return new FrenchTokenizerFactory<CoreLabel>(new CoreLabelTokenFactory(), FrenchTokenizer.DEFAULT_OPTIONS);
        }

        public static TokenizerFactory<Word> newWordTokenizerFactory(String options) {
            return new FrenchTokenizerFactory<Word>(new WordTokenFactory(), options);
        }

        private FrenchTokenizerFactory(LexedTokenFactory<T> factory) {
            this.factory = factory;
        }

        private FrenchTokenizerFactory(LexedTokenFactory<T> factory, String options) {
            this(factory);
            this.setOptions(options);
        }

        @Override
        public Iterator<T> getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r) {
            return new FrenchTokenizer<T>(r, this.factory, this.lexerProperties, this.splitCompoundOption, this.splitContractionOption);
        }

        @Override
        public void setOptions(String options) {
            String[] optionList;
            for (String option : optionList = options.split(",")) {
                String[] fields = option.split("=");
                if (fields.length == 1) {
                    if (fields[0].equals(FrenchTokenizer.SPLIT_COMPOUNDS_OPTION)) {
                        this.splitCompoundOption = true;
                        continue;
                    }
                    if (fields[0].equals(FrenchTokenizer.SPLIT_CONTRACTIONS_OPTION)) {
                        this.splitContractionOption = true;
                        continue;
                    }
                    this.lexerProperties.setProperty(option, "true");
                    continue;
                }
                if (fields.length == 2) {
                    if (fields[0].equals(FrenchTokenizer.SPLIT_COMPOUNDS_OPTION)) {
                        this.splitCompoundOption = Boolean.valueOf(fields[1]);
                        continue;
                    }
                    if (fields[0].equals(FrenchTokenizer.SPLIT_CONTRACTIONS_OPTION)) {
                        this.splitContractionOption = Boolean.valueOf(fields[1]);
                        continue;
                    }
                    this.lexerProperties.setProperty(fields[0], fields[1]);
                    continue;
                }
                System.err.printf("%s: Bad option %s%n", this.getClass().getName(), option);
            }
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r, String extraOptions) {
            this.setOptions(extraOptions);
            return this.getTokenizer(r);
        }
    }
}

