/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.tokenizer.spacy;

import ai.grazie.nlp.langs.Language;
import ai.grazie.nlp.tokenizer.Tokenizer;
import ai.grazie.nlp.tokenizer.spacy.SpacyBaseLanguage;
import ai.grazie.nlp.tokenizer.spacy.SpacyTokenInfo;
import ai.grazie.nlp.tokenizer.spacy.SpacyTokenizerFast;
import ai.grazie.nlp.tokenizer.spacy.SpacyTokenizerSpecialCases;
import ai.grazie.nlp.tokenizer.spacy.de.SpacyGerman;
import ai.grazie.nlp.tokenizer.spacy.de.SpacyGermanTokenizerExceptions;
import ai.grazie.nlp.tokenizer.spacy.en.SpacyEnglish;
import ai.grazie.nlp.tokenizer.spacy.en.SpacyEnglishTokenizerExceptions;
import ai.grazie.nlp.tokenizer.spacy.ru.SpacyRussian;
import ai.grazie.nlp.tokenizer.spacy.ru.SpacyRussianTokenizerExceptions;
import ai.grazie.nlp.tokenizer.spacy.uk.SpacyUkrainian;
import ai.grazie.nlp.tokenizer.spacy.uk.SpacyUkrainianTokenizerExceptions;
import ai.grazie.nlp.tokenizer.utils.UtilsKt;
import ai.grazie.nlp.utils.RangesKt;
import ai.grazie.nlp.utils.normalization.AggregatedNormalizer;
import ai.grazie.nlp.utils.normalization.CapsNormalizer;
import ai.grazie.nlp.utils.normalization.Normalizer;
import ai.grazie.nlp.utils.normalization.UnicodePunctuationNormalizer;
import ai.grazie.text.Text;
import ai.grazie.text.TextRange;
import ai.grazie.utils.mpp.LoggerFactory;
import ai.grazie.utils.mpp.MPPLogger;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.collections.SetsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.ranges.IntProgression;
import kotlin.ranges.IntRange;
import kotlin.sequences.Sequence;
import kotlin.text.CharsKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 7, 0}, k=1, xi=48, d1={"\u0000^\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\"\n\u0002\u0010\f\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\b\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0002\b\n\u0018\u0000 '2\u00020\u0001:\u0002'(B-\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0003\u0012\u0006\u0010\u0005\u001a\u00020\u0003\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\u0006\u0010\b\u001a\u00020\t\u00a2\u0006\u0002\u0010\nJ\u001e\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010\u0013\u001a\u00020\u00142\u0006\u0010\u0015\u001a\u00020\u0016H\u0002J\u0016\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00190\u00182\u0006\u0010\u001a\u001a\u00020\u0016H\u0002J\u0010\u0010\u001b\u001a\u00020\u001c2\u0006\u0010\u001a\u001a\u00020\u0016H\u0002J\u0010\u0010\u001d\u001a\u00020\u001c2\u0006\u0010\u001a\u001a\u00020\u0016H\u0002J\u0010\u0010\u001e\u001a\u00020\u001f2\u0006\u0010 \u001a\u00020\u0016H\u0002J\u0016\u0010!\u001a\b\u0012\u0004\u0012\u00020\u00160\u00112\u0006\u0010\"\u001a\u00020\u0016H\u0002J\u0016\u0010#\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010$\u001a\u00020\u0016H\u0016J\u0016\u0010%\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010$\u001a\u00020\u0016H\u0002J\u0016\u0010&\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010\u0015\u001a\u00020\u0016H\u0002R\u000e\u0010\u0004\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0014\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000e\u0010\u000fR\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0005\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006)"}, d2={"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast;", "Lai/grazie/nlp/tokenizer/Tokenizer;", "prefix", "Lkotlin/text/Regex;", "infix", "suffix", "specialCases", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;", "preNormalizer", "Lai/grazie/nlp/utils/normalization/AggregatedNormalizer;", "(Lkotlin/text/Regex;Lkotlin/text/Regex;Lkotlin/text/Regex;Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;Lai/grazie/nlp/utils/normalization/AggregatedNormalizer;)V", "oneCharSuffixes", "", "", "getPrefix", "()Lkotlin/text/Regex;", "finalizeSplit", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "splits", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits;", "word", "", "findInfix", "Lkotlin/sequences/Sequence;", "Lkotlin/text/MatchResult;", "tok", "findPrefix", "", "findSuffix", "isSpecialOrFinal", "", "token", "splitInfixes", "remainingWord", "tokenize", "text", "tokenizeSpacyNormalized", "tokenizeToken", "Companion", "TokenSplits", "nlp-tokenizer"})
@SourceDebugExtension(value={"SMAP\nSpacyTokenizerFast.kt\nKotlin\n*S Kotlin\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast\n+ 2 _Strings.kt\nkotlin/text/StringsKt___StringsKt\n+ 3 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,354:1\n1083#2,2:355\n1064#2,2:357\n1064#2,2:359\n1064#2,2:376\n766#3:361\n857#3,2:362\n1549#3:364\n1620#3,3:365\n1549#3:368\n1620#3,3:369\n1549#3:372\n1620#3,3:373\n1855#3,2:378\n*S KotlinDebug\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast\n*L\n170#1:355,2\n182#1:357,2\n183#1:359,2\n286#1:376,2\n200#1:361\n200#1:362,2\n201#1:364\n201#1:365,3\n214#1:368\n214#1:369,3\n226#1:372\n226#1:373,3\n292#1:378,2\n*E\n"})
public final class SpacyTokenizerFast
implements Tokenizer {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final Regex prefix;
    @NotNull
    private final Regex infix;
    @NotNull
    private final Regex suffix;
    @NotNull
    private final SpacyTokenizerSpecialCases specialCases;
    @NotNull
    private final AggregatedNormalizer preNormalizer;
    @NotNull
    private final Set<Character> oneCharSuffixes;
    @NotNull
    private static final MPPLogger logger = LoggerFactory.INSTANCE.create("ai.grazie.nlp.tokenizer.spacy.SpacyTokenizer");

    public SpacyTokenizerFast(@NotNull Regex prefix, @NotNull Regex infix, @NotNull Regex suffix, @NotNull SpacyTokenizerSpecialCases specialCases2, @NotNull AggregatedNormalizer preNormalizer) {
        Intrinsics.checkNotNullParameter((Object)prefix, (String)"prefix");
        Intrinsics.checkNotNullParameter((Object)infix, (String)"infix");
        Intrinsics.checkNotNullParameter((Object)suffix, (String)"suffix");
        Intrinsics.checkNotNullParameter((Object)specialCases2, (String)"specialCases");
        Intrinsics.checkNotNullParameter((Object)preNormalizer, (String)"preNormalizer");
        this.prefix = prefix;
        this.infix = infix;
        this.suffix = suffix;
        this.specialCases = specialCases2;
        this.preNormalizer = preNormalizer;
        Object[] objectArray = new Character[]{Character.valueOf('.'), Character.valueOf(':'), Character.valueOf(';'), Character.valueOf(','), Character.valueOf('?'), Character.valueOf('!')};
        this.oneCharSuffixes = SetsKt.setOf((Object[])objectArray);
    }

    @NotNull
    public final Regex getPrefix() {
        return this.prefix;
    }

    /*
     * WARNING - void declaration
     */
    @Override
    @NotNull
    public List<Tokenizer.Token> tokenize(@NotNull String text2) {
        void $this$mapTo$iv$iv;
        void $this$map$iv;
        void $this$filterTo$iv$iv;
        Iterable $this$filter$iv;
        Iterator<Tokenizer.Token> $i$f$any2;
        Object lastToken;
        List normalizedTokens;
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        String normalizedText = this.preNormalizer.normalize(text2);
        if (normalizedText.length() != text2.length()) {
            MPPLogger.error$default(logger, tokenize.1.INSTANCE, null, 2, null);
            normalizedText = text2;
        }
        if (!((Collection)(normalizedTokens = this.tokenizeSpacyNormalized(normalizedText))).isEmpty()) {
            int n;
            lastToken = (Tokenizer.Token)CollectionsKt.last(normalizedTokens);
            if (StringsKt.endsWith$default((CharSequence)((Tokenizer.Token)lastToken).getToken(), (char)'.', (boolean)false, (int)2, null)) {
                boolean bl;
                block12: {
                    CharSequence $this$any$iv = ((Tokenizer.Token)lastToken).getToken();
                    boolean $i$f$any2 = false;
                    for (n = 0; n < $this$any$iv.length(); ++n) {
                        char element$iv;
                        char it = element$iv = $this$any$iv.charAt(n);
                        boolean bl2 = false;
                        if (!Character.isLetter(it)) continue;
                        bl = true;
                        break block12;
                    }
                    bl = false;
                }
                if (bl) {
                    Object[] $i$f$any2 = new String[2];
                    Intrinsics.checkNotNullExpressionValue((Object)((Tokenizer.Token)lastToken).getToken().substring(0, ((Tokenizer.Token)lastToken).getToken().length() - 1), (String)"this as java.lang.String\u2026ing(startIndex, endIndex)");
                    $i$f$any2[1] = ".";
                    List lastTokenParts = CollectionsKt.listOf((Object[])$i$f$any2);
                    normalizedTokens = CollectionsKt.plus((Collection)normalizedTokens.subList(0, normalizedTokens.size() - 1), (Iterable)UtilsKt.splitToken((Tokenizer.Token)lastToken, lastTokenParts));
                }
            }
            ArrayList<Tokenizer.Token> mergedTokens = new ArrayList<Tokenizer.Token>();
            $i$f$any2 = normalizedTokens.iterator();
            n = 0;
            while ($i$f$any2.hasNext()) {
                boolean currentQE;
                boolean lastQE;
                Tokenizer.Token token;
                block14: {
                    boolean bl;
                    boolean bl3;
                    block13: {
                        int i = n++;
                        token = $i$f$any2.next();
                        if (i == 0) {
                            mergedTokens.add(token);
                            continue;
                        }
                        CharSequence $this$all$iv = ((Tokenizer.Token)CollectionsKt.last((List)mergedTokens)).getToken();
                        boolean $i$f$all = false;
                        for (int j = 0; j < $this$all$iv.length(); ++j) {
                            char element$iv;
                            char it = element$iv = $this$all$iv.charAt(j);
                            boolean bl4 = false;
                            if (it == '?' || it == '!') continue;
                            bl3 = false;
                            break block13;
                        }
                        bl3 = true;
                    }
                    lastQE = bl3;
                    CharSequence $this$all$iv = token.getToken();
                    boolean $i$f$all = false;
                    for (int i = 0; i < $this$all$iv.length(); ++i) {
                        char element$iv;
                        char it = element$iv = $this$all$iv.charAt(i);
                        boolean bl5 = false;
                        if (it == '?' || it == '!') continue;
                        bl = false;
                        break block14;
                    }
                    bl = currentQE = true;
                }
                if (lastQE && currentQE && RangesKt.getCheckedEndExclusive(((Tokenizer.Token)CollectionsKt.last((List)mergedTokens)).getRange()) == token.getRange().getFirst()) {
                    Tokenizer.Token lastMerged = (Tokenizer.Token)CollectionsKt.removeLast((List)mergedTokens);
                    mergedTokens.add(new Tokenizer.Token(new Text(lastMerged.getToken() + token.getToken()), new TextRange(lastMerged.getRange().getFirst(), RangesKt.getCheckedEndExclusive(token.getRange()))));
                    continue;
                }
                mergedTokens.add(token);
            }
            normalizedTokens = mergedTokens;
        }
        lastToken = normalizedTokens;
        boolean $i$f$filter = false;
        $i$f$any2 = $this$filter$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$filterTo = false;
        for (Object element$iv$iv : $this$filterTo$iv$iv) {
            Tokenizer.Token it = (Tokenizer.Token)element$iv$iv;
            boolean bl = false;
            boolean bl6 = !StringsKt.isBlank((CharSequence)it.getToken());
            if (!bl6) continue;
            destination$iv$iv.add(element$iv$iv);
        }
        $this$filter$iv = (List)destination$iv$iv;
        boolean $i$f$map = false;
        $this$filterTo$iv$iv = $this$map$iv;
        destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            Tokenizer.Token it = (Tokenizer.Token)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(new Tokenizer.Token(StringsKt.substring((String)text2, (IntRange)it.getRange()), it.getRange()));
        }
        return (List)destination$iv$iv;
    }

    /*
     * WARNING - void declaration
     */
    private final List<Tokenizer.Token> tokenizeSpacyNormalized(String text2) {
        Object object;
        Object destination$iv$iv;
        Iterable span;
        if (((CharSequence)text2).length() == 0) {
            return CollectionsKt.emptyList();
        }
        ArrayList result2 = new ArrayList();
        int start2 = 0;
        start2 = -1;
        int n = text2.length();
        for (int i = 0; i < n; ++i) {
            int i2 = i;
            char uc = text2.charAt(i);
            if (CharsKt.isWhitespace((char)uc)) {
                if (start2 != -1) {
                    void $this$mapTo$iv$iv;
                    Intrinsics.checkNotNullExpressionValue((Object)text2.substring(start2, i2), (String)"this as java.lang.String\u2026ing(startIndex, endIndex)");
                    Collection collection = result2;
                    Iterable $this$map$iv = this.tokenizeToken((String)((Object)span));
                    boolean $i$f$map = false;
                    Iterable iterable = $this$map$iv;
                    destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
                    boolean $i$f$mapTo = false;
                    for (Object item$iv$iv : $this$mapTo$iv$iv) {
                        void it;
                        Tokenizer.Token token = (Tokenizer.Token)item$iv$iv;
                        object = destination$iv$iv;
                        boolean bl = false;
                        object.add(new Tokenizer.Token(it.getToken(), RangesKt.withOffset(it.getRange(), start2)));
                    }
                    $this$map$iv = (List)destination$iv$iv;
                    CollectionsKt.addAll((Collection)collection, (Iterable)$this$map$iv);
                }
                start2 = -1;
                continue;
            }
            if (start2 != -1) continue;
            start2 = i2;
        }
        if (start2 != -1) {
            void $this$mapTo$iv$iv;
            String string2 = text2.substring(start2);
            Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"this as java.lang.String).substring(startIndex)");
            String span2 = string2;
            Collection collection = result2;
            Iterable $this$map$iv = this.tokenizeToken(span2);
            boolean $i$f$map = false;
            span = $this$map$iv;
            Collection destination$iv$iv2 = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                destination$iv$iv = (Tokenizer.Token)item$iv$iv;
                object = destination$iv$iv2;
                boolean bl = false;
                object.add(new Tokenizer.Token(it.getToken(), RangesKt.withOffset(it.getRange(), start2)));
            }
            Iterable iterable = (List)destination$iv$iv2;
            CollectionsKt.addAll((Collection)collection, (Iterable)iterable);
        }
        return result2;
    }

    private final List<Tokenizer.Token> tokenizeToken(String word) {
        TokenSplits splits = new TokenSplits();
        String remainingWord = word;
        if (this.isSpecialOrFinal(remainingWord)) {
            return this.finalizeSplit(splits, remainingWord);
        }
        if (this.oneCharSuffixes.contains(Character.valueOf(StringsKt.last((CharSequence)remainingWord)))) {
            String string2 = remainingWord.substring(0, remainingWord.length() - 1);
            Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"this as java.lang.String\u2026ing(startIndex, endIndex)");
            String minusSuffix = string2;
            if (this.isSpecialOrFinal(minusSuffix)) {
                ArrayList<String> arrayList = splits.getSuffixes();
                String string3 = remainingWord.substring(remainingWord.length() - 1);
                Intrinsics.checkNotNullExpressionValue((Object)string3, (String)"this as java.lang.String).substring(startIndex)");
                arrayList.add(string3);
                return this.finalizeSplit(splits, minusSuffix);
            }
        }
        while (((CharSequence)remainingWord).length() > 0) {
            if (this.isSpecialOrFinal(remainingWord)) {
                return this.finalizeSplit(splits, remainingWord);
            }
            int prefixLength = this.findPrefix(remainingWord);
            if (prefixLength == 0) break;
            ArrayList<String> arrayList = splits.getPrefixes();
            String string4 = remainingWord.substring(0, prefixLength);
            Intrinsics.checkNotNullExpressionValue((Object)string4, (String)"this as java.lang.String\u2026ing(startIndex, endIndex)");
            arrayList.add(string4);
            Intrinsics.checkNotNullExpressionValue((Object)remainingWord.substring(prefixLength), (String)"this as java.lang.String).substring(startIndex)");
        }
        while (((CharSequence)remainingWord).length() > 0) {
            if (this.isSpecialOrFinal(remainingWord)) {
                return this.finalizeSplit(splits, remainingWord);
            }
            int suffixLength = this.findSuffix(remainingWord);
            if (suffixLength == 0) break;
            ArrayList<String> arrayList = splits.getSuffixes();
            String string5 = remainingWord.substring(remainingWord.length() - suffixLength);
            Intrinsics.checkNotNullExpressionValue((Object)string5, (String)"this as java.lang.String).substring(startIndex)");
            arrayList.add(string5);
            Intrinsics.checkNotNullExpressionValue((Object)remainingWord.substring(0, remainingWord.length() - suffixLength), (String)"this as java.lang.String\u2026ing(startIndex, endIndex)");
        }
        if (this.isSpecialOrFinal(remainingWord) || this.specialCases.urlMatch(remainingWord)) {
            return this.finalizeSplit(splits, remainingWord);
        }
        splits.getWordTokens().addAll((Collection<String>)this.splitInfixes(remainingWord));
        return splits.toList();
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private final boolean isSpecialOrFinal(String token) {
        if (this.specialCases.get(token) != null) return true;
        CharSequence $this$all$iv = token;
        boolean $i$f$all = false;
        int n = 0;
        while (n < $this$all$iv.length()) {
            char element$iv;
            char it = element$iv = $this$all$iv.charAt(n);
            boolean bl = false;
            if (!Character.isLetter(it)) {
                return false;
            }
            ++n;
        }
        return true;
    }

    private final List<Tokenizer.Token> finalizeSplit(TokenSplits splits, String word) {
        List<SpacyTokenInfo> special = this.specialCases.get(word);
        if (special != null) {
            Iterable $this$forEach$iv = special;
            boolean $i$f$forEach = false;
            for (Object element$iv : $this$forEach$iv) {
                SpacyTokenInfo it = (SpacyTokenInfo)element$iv;
                boolean bl = false;
                splits.getWordTokens().add(it.getOrth());
            }
        } else {
            splits.getWordTokens().add(word);
        }
        return splits.toList();
    }

    private final int findPrefix(String tok) {
        MatchResult match2 = Regex.find$default((Regex)this.prefix, (CharSequence)tok, (int)0, (int)2, null);
        return match2 == null ? 0 : kotlin.ranges.RangesKt.last((IntProgression)((IntProgression)match2.getRange())) + 1 - kotlin.ranges.RangesKt.first((IntProgression)((IntProgression)match2.getRange()));
    }

    private final int findSuffix(String tok) {
        MatchResult match2 = Regex.find$default((Regex)this.suffix, (CharSequence)tok, (int)0, (int)2, null);
        return match2 == null ? 0 : kotlin.ranges.RangesKt.last((IntProgression)((IntProgression)match2.getRange())) + 1 - kotlin.ranges.RangesKt.first((IntProgression)((IntProgression)match2.getRange()));
    }

    private final List<String> splitInfixes(String remainingWord) {
        List tokens = new ArrayList();
        Sequence<MatchResult> matches = this.findInfix(remainingWord);
        int start2 = 0;
        for (MatchResult match2 : matches) {
            int startInfix = match2.getRange().getFirst();
            int endInfix = match2.getRange().getLast() + 1;
            if (startInfix == 0) continue;
            if (startInfix != start2) {
                String spanInfix;
                Intrinsics.checkNotNullExpressionValue((Object)remainingWord.substring(start2, startInfix), (String)"this as java.lang.String\u2026ing(startIndex, endIndex)");
                tokens.add(spanInfix);
            }
            if (startInfix != endInfix) {
                String infix;
                Intrinsics.checkNotNullExpressionValue((Object)remainingWord.substring(startInfix, endInfix), (String)"this as java.lang.String\u2026ing(startIndex, endIndex)");
                tokens.add(infix);
            }
            start2 = endInfix;
        }
        String string2 = remainingWord.substring(start2);
        Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"this as java.lang.String).substring(startIndex)");
        String span = string2;
        if (((CharSequence)span).length() > 0) {
            tokens.add(span);
        }
        return tokens;
    }

    private final Sequence<MatchResult> findInfix(String tok) {
        return Regex.findAll$default((Regex)this.infix, (CharSequence)tok, (int)0, (int)2, null);
    }

    @Metadata(mv={1, 7, 0}, k=1, xi=48, d1={"\u00000\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000b\n\u0000\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0006\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J(\u0010\u0005\u001a\u00020\u00062\u0006\u0010\u0007\u001a\u00020\b2\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fJ*\u0010\u000e\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\f2\b\b\u0002\u0010\u000f\u001a\u00020\nJ*\u0010\u0010\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\f2\b\b\u0002\u0010\u000f\u001a\u00020\nJ \u0010\u0011\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fJ \u0010\u0012\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fR\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0013"}, d2={"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$Companion;", "", "()V", "logger", "Lai/grazie/utils/mpp/MPPLogger;", "load", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast;", "language", "Lai/grazie/nlp/langs/Language;", "replaceApostropheWithQuote", "", "additionalPrefixes", "", "", "loadEnglish", "useNewAffixes", "loadGerman", "loadRussian", "loadUkrainian", "nlp-tokenizer"})
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final SpacyTokenizerFast load(@NotNull Language language, boolean replaceApostropheWithQuote, @NotNull List<String> additionalPrefixes) {
            Intrinsics.checkNotNullParameter((Object)((Object)language), (String)"language");
            Intrinsics.checkNotNullParameter(additionalPrefixes, (String)"additionalPrefixes");
            SpacyTokenizerFast wordTokenizer = switch (WhenMappings.$EnumSwitchMapping$0[language.ordinal()]) {
                case 1 -> ai.grazie.nlp.tokenizer.spacy.SpacyTokenizerFast$Companion.loadEnglish$default(this, replaceApostropheWithQuote, additionalPrefixes, false, 4, null);
                case 2 -> ai.grazie.nlp.tokenizer.spacy.SpacyTokenizerFast$Companion.loadGerman$default(this, replaceApostropheWithQuote, additionalPrefixes, false, 4, null);
                case 3 -> this.loadRussian(replaceApostropheWithQuote, additionalPrefixes);
                case 4 -> this.loadUkrainian(replaceApostropheWithQuote, additionalPrefixes);
                default -> throw new IllegalStateException("Language is not supported".toString());
            };
            return wordTokenizer;
        }

        public static /* synthetic */ SpacyTokenizerFast load$default(Companion companion, Language language, boolean bl, List list2, int n, Object object) {
            if ((n & 2) != 0) {
                bl = false;
            }
            if ((n & 4) != 0) {
                list2 = CollectionsKt.emptyList();
            }
            return companion.load(language, bl, list2);
        }

        @NotNull
        public final SpacyTokenizerFast loadEnglish(boolean replaceApostropheWithQuote, @NotNull List<String> additionalPrefixes, boolean useNewAffixes) {
            Intrinsics.checkNotNullParameter(additionalPrefixes, (String)"additionalPrefixes");
            Normalizer[] normalizerArray = new Normalizer[]{new UnicodePunctuationNormalizer(replaceApostropheWithQuote), new CapsNormalizer()};
            AggregatedNormalizer preNormalizer = new AggregatedNormalizer(normalizerArray);
            List prefixes = additionalPrefixes.isEmpty() ? SpacyEnglish.INSTANCE.getPrefixes() : CollectionsKt.plus((Collection)SpacyEnglish.INSTANCE.getPrefixes(), (Iterable)additionalPrefixes);
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(prefixes), SpacyBaseLanguage.INSTANCE.compileInfix(CollectionsKt.plus((Collection)SpacyEnglish.INSTANCE.getInfixes(), (Iterable)(useNewAffixes ? SpacyEnglish.INSTANCE.getNewInfixes() : CollectionsKt.emptyList()))), SpacyBaseLanguage.INSTANCE.compileSuffix(CollectionsKt.plus((Collection)SpacyEnglish.INSTANCE.getSuffixes(), (Iterable)(useNewAffixes ? SpacyEnglish.INSTANCE.getNewSuffixes() : CollectionsKt.emptyList()))), new SpacyTokenizerSpecialCases((Map<String, ? extends List<SpacyTokenInfo>>)SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), (Map<String, ? extends List<SpacyTokenInfo>>)SpacyEnglishTokenizerExceptions.INSTANCE.getExceptions()), preNormalizer);
        }

        public static /* synthetic */ SpacyTokenizerFast loadEnglish$default(Companion companion, boolean bl, List list2, boolean bl2, int n, Object object) {
            if ((n & 1) != 0) {
                bl = false;
            }
            if ((n & 2) != 0) {
                list2 = CollectionsKt.emptyList();
            }
            if ((n & 4) != 0) {
                bl2 = false;
            }
            return companion.loadEnglish(bl, list2, bl2);
        }

        @NotNull
        public final SpacyTokenizerFast loadGerman(boolean replaceApostropheWithQuote, @NotNull List<String> additionalPrefixes, boolean useNewAffixes) {
            Intrinsics.checkNotNullParameter(additionalPrefixes, (String)"additionalPrefixes");
            Normalizer[] normalizerArray = new Normalizer[]{new UnicodePunctuationNormalizer(replaceApostropheWithQuote), new CapsNormalizer()};
            AggregatedNormalizer preNormalizer = new AggregatedNormalizer(normalizerArray);
            List prefixes = additionalPrefixes.isEmpty() ? SpacyGerman.INSTANCE.getPrefixes() : CollectionsKt.plus((Collection)SpacyGerman.INSTANCE.getPrefixes(), (Iterable)additionalPrefixes);
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(prefixes), SpacyBaseLanguage.INSTANCE.compileInfix(useNewAffixes ? SpacyGerman.INSTANCE.getNewInfixes() : SpacyGerman.INSTANCE.getInfixes()), SpacyBaseLanguage.INSTANCE.compileSuffix(SpacyGerman.INSTANCE.getSuffixes()), new SpacyTokenizerSpecialCases((Map<String, ? extends List<SpacyTokenInfo>>)SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), SpacyGermanTokenizerExceptions.INSTANCE.getExceptions()), preNormalizer);
        }

        public static /* synthetic */ SpacyTokenizerFast loadGerman$default(Companion companion, boolean bl, List list2, boolean bl2, int n, Object object) {
            if ((n & 1) != 0) {
                bl = false;
            }
            if ((n & 2) != 0) {
                list2 = CollectionsKt.emptyList();
            }
            if ((n & 4) != 0) {
                bl2 = false;
            }
            return companion.loadGerman(bl, list2, bl2);
        }

        @NotNull
        public final SpacyTokenizerFast loadRussian(boolean replaceApostropheWithQuote, @NotNull List<String> additionalPrefixes) {
            Intrinsics.checkNotNullParameter(additionalPrefixes, (String)"additionalPrefixes");
            Normalizer[] normalizerArray = new Normalizer[]{new UnicodePunctuationNormalizer(replaceApostropheWithQuote), new CapsNormalizer()};
            AggregatedNormalizer preNormalizer = new AggregatedNormalizer(normalizerArray);
            List prefixes = additionalPrefixes.isEmpty() ? SpacyRussian.INSTANCE.getPrefixes() : CollectionsKt.plus((Collection)SpacyRussian.INSTANCE.getPrefixes(), (Iterable)additionalPrefixes);
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(prefixes), SpacyBaseLanguage.INSTANCE.compileInfix(SpacyRussian.INSTANCE.getInfixes()), SpacyBaseLanguage.INSTANCE.compileSuffix(SpacyRussian.INSTANCE.getSuffixes()), new SpacyTokenizerSpecialCases((Map<String, ? extends List<SpacyTokenInfo>>)SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), SpacyRussianTokenizerExceptions.INSTANCE.getExceptions()), preNormalizer);
        }

        public static /* synthetic */ SpacyTokenizerFast loadRussian$default(Companion companion, boolean bl, List list2, int n, Object object) {
            if ((n & 1) != 0) {
                bl = false;
            }
            if ((n & 2) != 0) {
                list2 = CollectionsKt.emptyList();
            }
            return companion.loadRussian(bl, list2);
        }

        @NotNull
        public final SpacyTokenizerFast loadUkrainian(boolean replaceApostropheWithQuote, @NotNull List<String> additionalPrefixes) {
            Intrinsics.checkNotNullParameter(additionalPrefixes, (String)"additionalPrefixes");
            Normalizer[] normalizerArray = new Normalizer[]{new UnicodePunctuationNormalizer(replaceApostropheWithQuote), new CapsNormalizer()};
            AggregatedNormalizer preNormalizer = new AggregatedNormalizer(normalizerArray);
            List prefixes = additionalPrefixes.isEmpty() ? SpacyUkrainian.INSTANCE.getPrefixes() : CollectionsKt.plus((Collection)SpacyUkrainian.INSTANCE.getPrefixes(), (Iterable)additionalPrefixes);
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(prefixes), SpacyBaseLanguage.INSTANCE.compileInfix(SpacyUkrainian.INSTANCE.getInfixes()), SpacyBaseLanguage.INSTANCE.compileSuffix(SpacyUkrainian.INSTANCE.getSuffixes()), new SpacyTokenizerSpecialCases((Map<String, ? extends List<SpacyTokenInfo>>)SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), SpacyUkrainianTokenizerExceptions.INSTANCE.getExceptions()), preNormalizer);
        }

        public static /* synthetic */ SpacyTokenizerFast loadUkrainian$default(Companion companion, boolean bl, List list2, int n, Object object) {
            if ((n & 1) != 0) {
                bl = false;
            }
            if ((n & 2) != 0) {
                list2 = CollectionsKt.emptyList();
            }
            return companion.loadUkrainian(bl, list2);
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }

        @Metadata(mv={1, 7, 0}, k=3, xi=48)
        public final class WhenMappings {
            public static final /* synthetic */ int[] $EnumSwitchMapping$0;

            static {
                int[] nArray = new int[Language.values().length];
                try {
                    nArray[Language.ENGLISH.ordinal()] = 1;
                }
                catch (NoSuchFieldError noSuchFieldError) {
                    // empty catch block
                }
                try {
                    nArray[Language.GERMAN.ordinal()] = 2;
                }
                catch (NoSuchFieldError noSuchFieldError) {
                    // empty catch block
                }
                try {
                    nArray[Language.RUSSIAN.ordinal()] = 3;
                }
                catch (NoSuchFieldError noSuchFieldError) {
                    // empty catch block
                }
                try {
                    nArray[Language.UKRAINIAN.ordinal()] = 4;
                }
                catch (NoSuchFieldError noSuchFieldError) {
                    // empty catch block
                }
                $EnumSwitchMapping$0 = nArray;
            }
        }
    }

    @Metadata(mv={1, 7, 0}, k=1, xi=48, d1={"\u0000&\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\b\u0002\u0018\u00002\u00020\u0001B\u0005\u00a2\u0006\u0002\u0010\u0002J\f\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u000f0\u000eR!\u0010\u0003\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR!\u0010\t\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006\u00a2\u0006\b\n\u0000\u001a\u0004\b\n\u0010\bR!\u0010\u000b\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006\u00a2\u0006\b\n\u0000\u001a\u0004\b\f\u0010\b\u00a8\u0006\u0010"}, d2={"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits;", "", "()V", "prefixes", "Ljava/util/ArrayList;", "", "Lkotlin/collections/ArrayList;", "getPrefixes", "()Ljava/util/ArrayList;", "suffixes", "getSuffixes", "wordTokens", "getWordTokens", "toList", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "nlp-tokenizer"})
    @SourceDebugExtension(value={"SMAP\nSpacyTokenizerFast.kt\nKotlin\n*S Kotlin\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,354:1\n1549#2:355\n1620#2,3:356\n*S KotlinDebug\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits\n*L\n148#1:355\n148#1:356,3\n*E\n"})
    private static final class TokenSplits {
        @NotNull
        private final ArrayList<String> prefixes = new ArrayList();
        @NotNull
        private final ArrayList<String> suffixes = new ArrayList();
        @NotNull
        private final ArrayList<String> wordTokens = new ArrayList();

        @NotNull
        public final ArrayList<String> getPrefixes() {
            return this.prefixes;
        }

        @NotNull
        public final ArrayList<String> getSuffixes() {
            return this.suffixes;
        }

        @NotNull
        public final ArrayList<String> getWordTokens() {
            return this.wordTokens;
        }

        /*
         * WARNING - void declaration
         */
        @NotNull
        public final List<Tokenizer.Token> toList() {
            void $this$mapTo$iv$iv;
            int cur = 0;
            Iterable $this$map$iv = CollectionsKt.plus((Collection)CollectionsKt.plus((Collection)this.prefixes, (Iterable)this.wordTokens), (Iterable)CollectionsKt.reversed((Iterable)this.suffixes));
            boolean $i$f$map = false;
            Iterable iterable = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                String string2 = (String)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl = false;
                int start2 = cur;
                collection.add(new Tokenizer.Token((String)it, new IntRange(start2, (cur += it.length()) - 1)));
            }
            return (List)destination$iv$iv;
        }
    }
}

