/*
 * Decompiled with CFR 0.152.
 */
package org.htmlcleaner;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.htmlcleaner.BaseToken;
import org.htmlcleaner.CData;
import org.htmlcleaner.CleanTimeValues;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CleanerTransformations;
import org.htmlcleaner.CommentNode;
import org.htmlcleaner.ContentNode;
import org.htmlcleaner.DoctypeToken;
import org.htmlcleaner.EndTagToken;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagInfo;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.TagToken;
import org.htmlcleaner.Utils;

public class HtmlTokenizer {
    private static final int WORKING_BUFFER_SIZE = 1024;
    private BufferedReader _reader;
    private char[] _working = new char[1024];
    private transient int _pos;
    private transient int _len = -1;
    private transient int _row = 1;
    private transient int _col = 1;
    private transient StringBuffer _saved = new StringBuffer(512);
    private transient boolean _isLateForDoctype;
    private transient DoctypeToken _docType;
    private transient TagToken _currentTagToken;
    private transient List<BaseToken> _tokenList = new ArrayList<BaseToken>();
    private transient Set<String> _namespacePrefixes = new HashSet<String>();
    private boolean _asExpected = true;
    private boolean _isSpecialContext;
    private String _isSpecialContextName;
    private HtmlCleaner cleaner;
    private CleanerProperties props;
    private CleanerTransformations transformations;
    private CleanTimeValues cleanTimeValues;

    public HtmlTokenizer(HtmlCleaner htmlCleaner, Reader reader, CleanTimeValues cleanTimeValues) {
        this._reader = new BufferedReader(reader);
        this.cleaner = htmlCleaner;
        this.props = htmlCleaner.getProperties();
        this.transformations = htmlCleaner.getTransformations();
        this.cleanTimeValues = cleanTimeValues;
    }

    private void addToken(BaseToken baseToken) {
        baseToken.setRow(this._row);
        baseToken.setCol(this._col);
        this._tokenList.add(baseToken);
        this.cleaner.makeTree(this._tokenList, this._tokenList.listIterator(this._tokenList.size() - 1), this.cleanTimeValues);
    }

    private void readIfNeeded(int n2) throws IOException {
        if (this._len == -1 && this._pos + n2 >= 1024) {
            int n3 = 1024 - this._pos;
            System.arraycopy(this._working, this._pos, this._working, 0, n3);
            this._pos = 0;
            int n4 = 1024 - n3;
            int n5 = 0;
            int n6 = 0;
            int n7 = n3;
            do {
                if ((n6 = this._reader.read(this._working, n7, n4)) < 0) continue;
                n5 += n6;
                n7 += n6;
                n4 -= n6;
            } while (n6 >= 0 && n4 > 0);
            if (n4 > 0) {
                this._len = n5 + n3;
            }
            for (int i2 = 0; i2 < (this._len >= 0 ? this._len : 1024); ++i2) {
                char c2 = this._working[i2];
                if (c2 >= '\u0001' && c2 <= ' ' && c2 != '\n' && c2 != '\r') {
                    this._working[i2] = 32;
                }
                if (c2 != '\u0000') continue;
                this._working[i2] = 65533;
            }
        }
    }

    List<BaseToken> getTokenList() {
        return this._tokenList;
    }

    Set<String> getNamespacePrefixes() {
        return this._namespacePrefixes;
    }

    private void go() throws IOException {
        this.go(1);
    }

    private void go(int n2) throws IOException {
        this._pos += n2;
        this.readIfNeeded(n2 - 1);
        if (this._pos < 0) {
            this._pos = 0;
        }
    }

    private boolean startsWith(String string) throws IOException {
        int n2 = string.length();
        this.readIfNeeded(n2);
        if (this._len >= 0 && this._pos + n2 > this._len) {
            return false;
        }
        for (int i2 = 0; i2 < n2; ++i2) {
            char c2;
            char c3 = Character.toLowerCase(string.charAt(i2));
            if (c3 == (c2 = Character.toLowerCase(this._working[this._pos + i2]))) continue;
            return false;
        }
        return true;
    }

    private boolean isWhitespace(int n2) {
        if (this._len >= 0 && n2 >= this._len) {
            return false;
        }
        return Character.isWhitespace(this._working[n2]);
    }

    private boolean isWhitespace() {
        return this.isWhitespace(this._pos);
    }

    private boolean isChar(int n2, char c2) {
        if (this._len >= 0 && n2 >= this._len) {
            return false;
        }
        return Character.toLowerCase(c2) == Character.toLowerCase(this._working[n2]);
    }

    private boolean isChar(char c2) {
        return this.isChar(this._pos, c2);
    }

    private boolean isElementIdentifierStartChar(int n2) {
        if (this._len >= 0 && n2 >= this._len) {
            return false;
        }
        char c2 = this._working[n2];
        return Character.isUnicodeIdentifierStart(c2);
    }

    private boolean isHtmlAttributeIdentifierStartChar() {
        char c2 = this._working[this._pos];
        if (c2 == '<') {
            return false;
        }
        return this.isHtmlAttributeIdentifierChar();
    }

    private boolean isHtmlAttributeIdentifierChar() {
        return this.isHtmlAttributeIdentifierChar(this._pos);
    }

    private boolean isHtmlElementIdentifier() {
        return this.isHtmlElementIdentifier(this._pos);
    }

    private boolean isHtmlElementIdentifier(int n2) {
        if (!this.isHtmlAttributeIdentifierChar(n2)) {
            return false;
        }
        if (this._len >= 0 && n2 >= this._len) {
            return false;
        }
        char c2 = this._working[n2];
        if (c2 == '>') {
            return false;
        }
        if (c2 == '/') {
            return false;
        }
        if (c2 == ' ') {
            return false;
        }
        if (c2 == '<') {
            return false;
        }
        return !Character.isSpaceChar(c2);
    }

    private boolean isHtmlAttributeIdentifierChar(int n2) {
        if (this._len >= 0 && n2 >= this._len) {
            return false;
        }
        char c2 = this._working[n2];
        if (Character.isWhitespace(c2)) {
            return false;
        }
        if (c2 == '\u0000' || c2 == '\ufffd' || c2 == '\"' || c2 == "'".charAt(0) || c2 == '>' || c2 == '/' || c2 == '=') {
            return false;
        }
        if (Character.isISOControl(c2)) {
            return false;
        }
        return Character.isDefined(c2);
    }

    private boolean isAllRead() {
        return this._len >= 0 && this._pos >= this._len;
    }

    private void save(char c2) {
        this.updateCoordinates(c2);
        this._saved.append(c2);
    }

    private void updateCoordinates(char c2) {
        if (c2 == '\n') {
            ++this._row;
            this._col = 1;
        } else {
            ++this._col;
        }
    }

    private void saveCurrent() {
        if (!this.isAllRead()) {
            this.save(this._working[this._pos]);
        }
    }

    private void saveCurrent(int n2) throws IOException {
        this.readIfNeeded(n2);
        int n3 = this._pos;
        while (!this.isAllRead() && n2 > 0) {
            this.save(this._working[n3]);
            ++n3;
            --n2;
        }
    }

    private void skipWhitespaces() throws IOException {
        while (!this.isAllRead() && this.isWhitespace()) {
            this.saveCurrent();
            this.go();
        }
    }

    private boolean addSavedAsContent() {
        if (this._saved.length() > 0) {
            this.addToken((BaseToken)new ContentNode(this.props.isDeserializeEntities() ? Utils.deserializeEntities((String)this._saved.toString(), (boolean)this.props.isRecognizeUnicodeChars()) : this._saved.toString()));
            this._saved.delete(0, this._saved.length());
            return true;
        }
        return false;
    }

    void start() throws IOException {
        this._currentTagToken = null;
        this._tokenList.clear();
        this._asExpected = true;
        this._isSpecialContext = false;
        this._isLateForDoctype = false;
        this._namespacePrefixes.clear();
        this._pos = 1024;
        this.readIfNeeded(0);
        boolean bl = true;
        while (!this.isAllRead()) {
            if (Thread.currentThread().isInterrupted()) {
                this.handleInterruption();
                this._tokenList.clear();
                this._namespacePrefixes.clear();
                this._reader.close();
                return;
            }
            this._saved.delete(0, this._saved.length());
            this._currentTagToken = null;
            this._asExpected = true;
            this.readIfNeeded(10);
            if (this._isSpecialContext) {
                int n2 = this._isSpecialContextName.length();
                if (this.startsWith("</" + this._isSpecialContextName) && (this.isWhitespace(this._pos + n2 + 2) || this.isChar(this._pos + n2 + 2, '>'))) {
                    this.tagEnd();
                } else if (bl && this.startsWith("<!--")) {
                    this.comment();
                } else if (this.startsWith("/*<![CDATA[*/") || this.startsWith("<![CDATA[") || this.startsWith("//<![CDATA[")) {
                    this.cdata();
                } else {
                    String string;
                    BaseToken baseToken;
                    boolean bl2 = this.content();
                    if (bl && bl2 && (baseToken = this._tokenList.get(this._tokenList.size() - 1)) != null && (string = baseToken.toString()) != null && string.trim().length() > 0) {
                        bl = false;
                    }
                }
                if (this._isSpecialContext) continue;
                bl = true;
                continue;
            }
            if (this.startsWith("<!doctype")) {
                if (!this._isLateForDoctype) {
                    this.doctype();
                    this._isLateForDoctype = true;
                    continue;
                }
                this.ignoreUntil('<');
                continue;
            }
            if (this.startsWith("</") && this.isElementIdentifierStartChar(this._pos + 2)) {
                this._isLateForDoctype = true;
                this.tagEnd();
                continue;
            }
            if (this.startsWith("/*<![CDATA[*/") || this.startsWith("<![CDATA[") || this.startsWith("//<![CDATA[")) {
                this.cdata();
                continue;
            }
            if (this.startsWith("<!--")) {
                this.comment();
                continue;
            }
            if (this.startsWith("<") && this.isElementIdentifierStartChar(this._pos + 1)) {
                this._isLateForDoctype = true;
                this.tagStart();
                continue;
            }
            if (this.props.isIgnoreQuestAndExclam() && (this.startsWith("<!") || this.startsWith("<?"))) {
                this.ignoreUntil('<');
                if (!this.isChar('>')) continue;
                this.go();
                continue;
            }
            if (this.startsWith("<?xml")) {
                this.ignoreUntil('<');
                continue;
            }
            this.content();
        }
        this._reader.close();
    }

    private boolean isReservedTag(String string) {
        return "html".equalsIgnoreCase(string) || "head".equalsIgnoreCase(string) || "body".equalsIgnoreCase(string);
    }

    private void tagStart() throws IOException {
        TagNode tagNode;
        this.saveCurrent();
        this.go();
        if (this.isAllRead()) {
            return;
        }
        String string = this.identifier(false);
        String string2 = this.transformations.getTagName(string);
        if (string2 != null && ((tagNode = this.cleaner.getTagInfo(string2, this.cleanTimeValues)) == null && !this.props.isOmitUnknownTags() && this.props.isTreatUnknownTagsAsContent() && !this.isReservedTag(string2) && !this.props.isNamespacesAware() || tagNode != null && tagNode.isDeprecated() && !this.props.isOmitDeprecatedTags() && this.props.isTreatDeprecatedTagsAsContent())) {
            this.content();
            return;
        }
        tagNode = new TagNode(string2);
        tagNode.setTrimAttributeValues(this.props.isTrimAttributeValues());
        this._currentTagToken = tagNode;
        if (this._asExpected) {
            this.skipWhitespaces();
            this.tagAttributes();
            if (string2 != null) {
                if (this.transformations != null) {
                    boolean bl = this.props.isIgnoreAttributesCase();
                    tagNode.setAttributes(this.transformations.transformAttributes(string, tagNode.getAttributesInLowerCase(bl)));
                }
                this.addToken((BaseToken)this._currentTagToken);
            }
            if (this.isChar('>')) {
                this.go();
                if (this.props.isUseCdataFor(string2)) {
                    this._isSpecialContext = true;
                    this._isSpecialContextName = string2;
                }
            } else if (this.startsWith("/>")) {
                this.go(2);
                this.addToken((BaseToken)new EndTagToken(string2));
            }
            this._currentTagToken = null;
        } else {
            this.addSavedAsContent();
        }
    }

    private void tagEnd() throws IOException {
        TagInfo tagInfo;
        this.saveCurrent(2);
        this.go(2);
        this._col += 2;
        if (this.isAllRead()) {
            return;
        }
        String string = this.identifier(false);
        if (this.transformations != null && this.transformations.hasTransformationForTag(string) && (tagInfo = this.transformations.getTransformation(string)) != null) {
            string = tagInfo.getDestTag();
        }
        if (string != null && ((tagInfo = this.cleaner.getTagInfo(string, this.cleanTimeValues)) == null && !this.props.isOmitUnknownTags() && this.props.isTreatUnknownTagsAsContent() && !this.isReservedTag(string) && !this.props.isNamespacesAware() || tagInfo != null && tagInfo.isDeprecated() && !this.props.isOmitDeprecatedTags() && this.props.isTreatDeprecatedTagsAsContent())) {
            this.content();
            return;
        }
        this._currentTagToken = new EndTagToken(string);
        if (this._asExpected) {
            this.skipWhitespaces();
            this.tagAttributes();
            if (string != null) {
                this.addToken((BaseToken)this._currentTagToken);
            }
            if (this.isChar('>')) {
                this.go();
            }
            if (this.props.isUseCdataFor(string)) {
                this._isSpecialContext = false;
                this._isSpecialContextName = string;
            }
            if (string != null && string.equalsIgnoreCase("html")) {
                this.skipWhitespaces();
            }
            this._currentTagToken = null;
        } else {
            this.addSavedAsContent();
        }
    }

    private String identifier(boolean bl) throws IOException {
        this._asExpected = true;
        if (!this.isHtmlAttributeIdentifierStartChar()) {
            this._asExpected = false;
            return null;
        }
        StringBuffer stringBuffer = new StringBuffer();
        while (!this.isAllRead() && (bl && this.isHtmlAttributeIdentifierChar() || !bl && this.isHtmlElementIdentifier())) {
            this.saveCurrent();
            stringBuffer.append(this._working[this._pos]);
            this.go();
        }
        if (stringBuffer.length() == 0) {
            return null;
        }
        Object object = stringBuffer.toString();
        int n2 = ((String)object).indexOf(58);
        if (n2 >= 0) {
            String string = ((String)object).substring(0, n2);
            String string2 = ((String)object).substring(n2 + 1);
            int n3 = string2.indexOf(58);
            if (n3 >= 0) {
                string2 = string2.substring(0, n3);
            }
            if (this.props.isNamespacesAware()) {
                object = string + ":" + string2;
                if (!"xmlns".equalsIgnoreCase(string)) {
                    this._namespacePrefixes.add(string.toLowerCase());
                }
            } else {
                object = string2;
            }
        }
        return object;
    }

    private void tagAttributes() throws IOException {
        while (!this.isAllRead() && this._asExpected && !this.isChar('>') && !this.startsWith("/>")) {
            String string;
            if (Thread.currentThread().isInterrupted()) {
                this.handleInterruption();
                return;
            }
            this.skipWhitespaces();
            String string2 = this.identifier(true);
            if (!this._asExpected) {
                if (!(this.isChar('<') || this.isChar('>') || this.startsWith("/>"))) {
                    this.saveCurrent();
                    this.go();
                }
                if (this.isChar('<')) continue;
                this._asExpected = true;
                continue;
            }
            this.skipWhitespaces();
            if (this.isChar('=')) {
                this.saveCurrent();
                this.go();
                string = this.attributeValue();
            } else {
                string = "empty".equals(this.props.getBooleanAttributeValues()) ? "" : ("true".equals(this.props.getBooleanAttributeValues()) ? "true" : string2);
            }
            if (!this._asExpected) continue;
            this._currentTagToken.addAttribute(string2, string);
        }
    }

    private String attributeValue() throws IOException {
        this.skipWhitespaces();
        if (this.isChar('<') || this.isChar('>') || this.startsWith("/>")) {
            return "";
        }
        boolean bl = false;
        boolean bl2 = false;
        StringBuffer stringBuffer = new StringBuffer();
        if (this.isChar('\'')) {
            bl2 = true;
            this.saveCurrent();
            this.go();
        } else if (this.isChar('\"')) {
            bl = true;
            this.saveCurrent();
            this.go();
        }
        boolean bl3 = this.props.isAllowMultiWordAttributes();
        boolean bl4 = this.props.isAllowHtmlInsideAttributes();
        while (!(this.isAllRead() || (!bl2 || this.isChar('\'') || !bl4 && (this.isChar('>') || this.isChar('<')) || !bl3 && this.isWhitespace()) && (!bl || this.isChar('\"') || !bl4 && (this.isChar('>') || this.isChar('<')) || !bl3 && this.isWhitespace()) && (bl2 || bl || this.isWhitespace() || this.isChar('>') || this.isChar('<')))) {
            stringBuffer.append(this._working[this._pos]);
            this.saveCurrent();
            this.go();
        }
        if (this.isChar('\'') && bl2) {
            this.saveCurrent();
            this.go();
        } else if (this.isChar('\"') && bl) {
            this.saveCurrent();
            this.go();
        }
        if (this.props.isDeserializeEntities()) {
            return Utils.deserializeEntities((String)stringBuffer.toString(), (boolean)this.props.isRecognizeUnicodeChars());
        }
        return stringBuffer.toString();
    }

    private boolean content() throws IOException {
        while (!this.isAllRead()) {
            this.saveCurrent();
            this.go();
            if (!this.startsWith("/*<![CDATA[*/") && !this.startsWith("<![CDATA[") && !this.startsWith("//<![CDATA[") && !this.isTagStartOrEnd()) continue;
            break;
        }
        return this.addSavedAsContent();
    }

    private boolean isTagStartOrEnd() throws IOException {
        return this.startsWith("</") || this.startsWith("<!") || this.startsWith("<?") || this.startsWith("<") && this.isElementIdentifierStartChar(this._pos + 1);
    }

    private void ignoreUntil(char c2) throws IOException {
        while (!this.isAllRead()) {
            this.go();
            this.updateCoordinates(this._working[this._pos]);
            if (!this.isChar(c2)) continue;
            break;
        }
    }

    private void comment() throws IOException {
        this.go(4);
        if (this.startsWith(">")) {
            this.go();
            return;
        }
        if (this.startsWith("->")) {
            this.go(2);
            return;
        }
        while (!this.isAllRead() && !this.startsWith("-->")) {
            this.saveCurrent();
            this.go();
        }
        if (this.startsWith("-->")) {
            this.go(3);
        }
        if (this._saved.length() > 0) {
            if (!this.props.isOmitComments()) {
                int n2;
                String string = this.props.getHyphenReplacementInComment();
                Object object = this._saved.toString().replaceAll("--", string + string);
                if (((String)object).length() > 0 && ((String)object).charAt(0) == '-') {
                    object = string + ((String)object).substring(1);
                }
                if ((n2 = ((String)object).length()) > 0 && ((String)object).charAt(n2 - 1) == '-') {
                    object = ((String)object).substring(0, n2 - 1) + string;
                }
                this.addToken((BaseToken)new CommentNode((String)object));
            }
            this._saved.delete(0, this._saved.length());
        }
    }

    private void cdata() throws IOException {
        if (!this._isSpecialContext && !this.props.isOmitCdataOutsideScriptAndStyle()) {
            this.content();
            return;
        }
        if (this.startsWith("/*<![CDATA[*/")) {
            this.go("/*<![CDATA[*/".length());
        } else if (this.startsWith("//<![CDATA[")) {
            this.go("//<![CDATA[".length());
        } else {
            this.go("<![CDATA[".length());
        }
        int n2 = this._saved.length();
        if (!this.containsEndCData()) {
            this.go(n2 - this._saved.length());
            return;
        }
        while (!(this.isAllRead() || this.startsWith("/*]]>*/") || this.startsWith("]]>") || this.startsWith("//]]>"))) {
            this.saveCurrent();
            this.go();
        }
        if (this.startsWith("/*]]>*/")) {
            this.go("/*]]>*/".length());
        } else if (this.startsWith("//]]>")) {
            this.go("//]]>".length());
        } else if (this.startsWith("]]>")) {
            this.go("]]>".length());
        } else {
            this.go(n2 - this._saved.length());
            return;
        }
        if (this._saved.length() > 0 && (this._isSpecialContext || !this.props.isOmitCdataOutsideScriptAndStyle())) {
            String string = this._saved.toString().substring(n2);
            this.addToken((BaseToken)new CData(string));
        }
        this._saved.delete(n2, this._saved.length());
    }

    private void doctype() throws IOException {
        this.go(9);
        this.skipWhitespaces();
        String string = this.identifier(false);
        this.skipWhitespaces();
        String string2 = this.identifier(false);
        this.skipWhitespaces();
        String string3 = this.attributeValue();
        this.skipWhitespaces();
        String string4 = this.attributeValue();
        this.skipWhitespaces();
        String string5 = this.attributeValue();
        this.ignoreUntil('<');
        this._docType = string5 == null || string5.length() == 0 ? new DoctypeToken(string, string2, string3, string4) : new DoctypeToken(string, string2, string3, string4, string5);
    }

    public DoctypeToken getDocType() {
        return this._docType;
    }

    private void handleInterruption() {
    }

    private boolean containsEndCData() throws IOException {
        int n2;
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(this._working);
        String string = stringBuffer.toString();
        if (string.contains("]]>") || string.contains("/*]]>*/") || string.contains("//]]>")) {
            return true;
        }
        if (!this._reader.markSupported()) {
            return false;
        }
        int n3 = 524288;
        this._reader.mark(524288);
        stringBuffer = new StringBuffer();
        for (int i2 = 0; (n2 = this._reader.read()) != -1 && i2 < 524287; ++i2) {
            stringBuffer.append((char)n2);
            string = stringBuffer.toString();
            if (!string.contains("]]>") && !string.contains("/*]]>*/") && !string.contains("//]]>")) continue;
            this._reader.reset();
            return true;
        }
        this._reader.reset();
        return false;
    }
}

