/*
 * Decompiled with CFR 0.152.
 */
package ro.sync.exml.indexer.parser;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.StringTokenizer;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
import ro.sync.exml.indexer.ElementIndexer;
import ro.sync.exml.indexer.StopWordsPreferences;
import ro.sync.exml.indexer.keywords.IKeywordsRepository;
import ro.sync.exml.indexer.parser.ContentHandlerComposite;
import ro.sync.exml.indexer.parser.ElementClassPair;
import ro.sync.exml.indexer.parser.KeywordsCollector;
import ro.sync.exml.indexer.parser.ShortDescriptorExtractor;
import ro.sync.exml.indexer.parser.TitleExtractor;
import ro.sync.exml.indexer.parser.TopicTocIDExtractor;
import ro.sync.exml.indexer.score.ScoreRepository;
import ro.sync.exml.indexer.utils.IndexedFileInfo;

public class SaxDocFileParser {
    private static final Logger logger = LoggerFactory.getLogger((String)SaxDocFileParser.class.getName());
    protected IndexedFileInfo fileDesc = null;
    protected ScoreRepository scoreRepository;
    private ContentHandlerComposite dispatcher;
    private ElementIndexer elementIndexer;
    private KeywordsCollector keywordsCollector;
    private IKeywordsRepository keywordsRepository;
    private final ArrayList<ElementClassPair> elemClassPairs = new ArrayList();

    public SaxDocFileParser(ScoreRepository scoreRepository, String languageCode, String userDictionary, boolean stem, IKeywordsRepository keywordsRepository) {
        this(scoreRepository, languageCode, userDictionary, stem, keywordsRepository, new StopWordsPreferences());
    }

    public SaxDocFileParser(ScoreRepository scoreRepository, String languageCode, String userDictionary, boolean stem, IKeywordsRepository keywordsRepository, StopWordsPreferences stopWordsPreferences) {
        this.scoreRepository = scoreRepository;
        this.keywordsRepository = keywordsRepository;
        this.elementIndexer = new ElementIndexer(scoreRepository, languageCode, userDictionary, stem, stopWordsPreferences);
        this.keywordsCollector = new KeywordsCollector(keywordsRepository);
    }

    private ContentHandlerComposite createDispatcher() {
        this.dispatcher = new ContentHandlerComposite();
        this.dispatcher.addContentHandler(this.elementIndexer);
        this.dispatcher.addContentHandler(this.keywordsCollector);
        this.dispatcher.addContentHandler(new TitleExtractor(this.fileDesc, this.keywordsRepository));
        ShortDescriptorExtractor shortDescriptorExtractor = new ShortDescriptorExtractor(this.fileDesc);
        shortDescriptorExtractor.setElementsNotToIndex(this.elemClassPairs);
        this.dispatcher.addContentHandler(shortDescriptorExtractor);
        this.dispatcher.addContentHandler(new TopicTocIDExtractor(this.fileDesc));
        return this.dispatcher;
    }

    public IndexedFileInfo parseDocument(File file, String relPath) {
        this.fileDesc = new IndexedFileInfo(file, relPath);
        this.scoreRepository.startIndexingFile(file);
        this.elementIndexer.startIndexingFile();
        SAXParserFactory spf = SAXParserFactory.newInstance();
        spf.setValidating(false);
        try {
            spf.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
            SAXParser saxParser = spf.newSAXParser();
            saxParser.getXMLReader().setFeature("http://xml.org/sax/features/external-general-entities", false);
            saxParser.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            String content = this.removeValidationPI(file);
            if (content != null) {
                InputSource is = new InputSource(new StringReader(content));
                is.setSystemId(file.toURI().toURL().toString());
                saxParser.parse(is, (DefaultHandler)this.createDispatcher());
            }
        }
        catch (SAXParseException spe) {
            logger.warn("SaxParseException: indexed file contains incorrect xml syntax: " + file.getPath());
        }
        catch (SAXException se) {
            logger.warn("SaxException. You may need to include Xerces in your classpath. See documentation for details.");
        }
        catch (ParserConfigurationException pce) {
            logger.warn(pce.getMessage(), (Throwable)pce);
        }
        catch (IOException ie) {
            logger.warn(ie.getMessage(), (Throwable)ie);
        }
        return this.fileDesc;
    }

    private String removeValidationPI(File file) {
        StringBuilder sb = new StringBuilder();
        try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), "UTF-8"));){
            try {
                String line;
                while ((line = br.readLine()) != null) {
                    if (!(line = line.replaceAll("\\x26nbsp\\x3B", "&#160;")).contains("<!DOCTYPE html PUBLIC")) {
                        if (line.contains("<?xml version")) {
                            line = line.replaceAll("\\x3C\\x3Fxml[^\\x3E]*\\x3F\\x3E", "\n");
                        }
                        sb.append(line + "\n");
                        continue;
                    }
                    int i1 = line.indexOf("<!DOCTYPE");
                    int i2 = line.indexOf(">", i1);
                    while (i2 < 0) {
                        line = line.concat(br.readLine());
                        i2 = line.indexOf(">", i1);
                    }
                    if (line.contains("<?xml version")) {
                        line = line.replaceAll("\\x3C\\x3Fxml[^\\x3E]*\\x3F\\x3E", "\n");
                    }
                    line = line.replaceAll("\\x3C\\x21DOCTYPE[^\\x3E]*\\x3E", "\n");
                    sb.append(line);
                }
            }
            catch (IOException e) {
                logger.error(e.getMessage(), (Throwable)e);
            }
        }
        catch (IOException e) {
            logger.error(e.getMessage(), (Throwable)e);
            return null;
        }
        return sb.toString();
    }

    public void setElementsNotToIndex(String doNotIndexElements) {
        this.elemClassPairs.clear();
        if (doNotIndexElements != null) {
            StringTokenizer tokenizer = new StringTokenizer(doNotIndexElements, ",");
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                int idx = token.indexOf(".");
                String elemName = "";
                String elemClass = "";
                if (idx != -1) {
                    elemName = token.substring(0, idx);
                    elemClass = token.substring(idx + 1, token.length());
                } else {
                    elemName = token;
                }
                this.elemClassPairs.add(new ElementClassPair(elemName.trim(), elemClass.trim()));
            }
        }
    }
}

