/*
 * Decompiled with CFR 0.152.
 */
package ro.sync.textsearch.html;

import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.index.IndexableField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import ro.sync.basic.io.RewindableInputStream;
import ro.sync.textsearch.DocumentCreator;
import ro.sync.textsearch.fields.StoredTextField;
import ro.sync.textsearch.html.BufferedEmbedder;
import ro.sync.textsearch.html.HTMLDocumentCreator;
import ro.sync.textsearch.html.LuceneDocumentBuilder;
import ro.sync.textsearch.webhelp.converter.IContentConverter;
import ro.sync.textsearch.webhelp.embeddings.IEmbeddingProvider;
import ro.sync.textsearch.xhtml.LightXHTMLMetaShredder;
import ro.sync.textsearch.xhtml.LightXHTMLShredder;
import ro.sync.textsearch.xhtml.XHTMLBreadcrumbShredder;
import ro.sync.textsearch.xhtml.XHTMLLabelShredder;
import ro.sync.textsearch.xml.CompoundDefaultHandler;
import ro.sync.textsearch.xml.XMLOptions;

public class LightXHTMLDocumentCreator
extends HTMLDocumentCreator {
    private static final Logger logger = LoggerFactory.getLogger(LightXHTMLDocumentCreator.class);
    private FacetsConfig facetsConfig;
    protected static final String CHATBOT_INDEXING_UUID = "5046f1ce-3db7-458e-b6bc-e49e8dc675bb";

    public LightXHTMLDocumentCreator(XMLOptions xmlOptions, FacetsConfig facetsConfig) {
        super(xmlOptions, false, false, -1);
        this.facetsConfig = facetsConfig;
    }

    @Override
    public Document[] createDocuments(InputSource source, long contentLenght, List<String> tagsAndClassesToIgnore, IContentConverter contentConverter, IEmbeddingProvider embeddingsProvider, boolean generateCompletionFields) throws IOException, DocumentCreator.ParseException {
        return this.createDocuments(source, contentLenght, tagsAndClassesToIgnore, contentConverter, embeddingsProvider);
    }

    public Document[] createDocuments(InputSource source, long contentLenght, List<String> tagsAndClassesToIgnore, IContentConverter contentConverter, IEmbeddingProvider embeddingsProvider) throws IOException, DocumentCreator.ParseException {
        source = this.makeWellFormed(source);
        ArrayList<Document> result = new ArrayList<Document>(1);
        try {
            RewindableInputStream ris = new RewindableInputStream(source.getByteStream());
            InputSource wfInputSource = new InputSource(source.getSystemId());
            wfInputSource.setByteStream((InputStream)ris);
            LightXHTMLShredder shredder = new LightXHTMLShredder(source.getSystemId());
            LightXHTMLMetaShredder metaShredder = new LightXHTMLMetaShredder();
            CompoundDefaultHandler compoundHandler = new CompoundDefaultHandler(shredder, metaShredder);
            XHTMLBreadcrumbShredder breadcrumbShredder = new XHTMLBreadcrumbShredder();
            XHTMLLabelShredder labelShredder = new XHTMLLabelShredder();
            CompoundDefaultHandler compoundAdditionalHandler = new CompoundDefaultHandler(breadcrumbShredder, labelShredder);
            XMLReader parser2use = this.createXMLParser(tagsAndClassesToIgnore, false, shredder, compoundHandler, compoundAdditionalHandler);
            parser2use.parse(wfInputSource);
            List<Field> docFields = shredder.getDocumentFields();
            docFields.addAll(metaShredder.getDocumentFields());
            docFields.add(breadcrumbShredder.getDocumentField());
            docFields.addAll(labelShredder.getDocumentFields());
            if (this.facetsConfig != null) {
                Set<String> facets = metaShredder.getFacetsCollection();
                for (String facet : facets) {
                    this.facetsConfig.setHierarchical(facet, true);
                    this.facetsConfig.setMultiValued(facet, true);
                }
            }
            Document doc = new Document();
            docFields.forEach(arg_0 -> ((Document)doc).add(arg_0));
            doc.add((IndexableField)new StringField("__document_type__", "keywords", Field.Store.YES));
            if (contentConverter != null && embeddingsProvider != null && embeddingsProvider.isSemanticSearchEnabled()) {
                ris.rewind();
                String markdownContent = contentConverter.getConvertedContent(wfInputSource);
                logger.info("Markdown content was computed {}", (Object)(markdownContent != null ? 1 : 0));
                doc.add((IndexableField)new StoredTextField("markdown_content", markdownContent));
                List<Document> semanticDocuments = new LuceneDocumentBuilder(source.getSystemId(), markdownContent).buildDocsForFile();
                BufferedEmbedder buffer = new BufferedEmbedder(document -> result.add((Document)document), CHATBOT_INDEXING_UUID);
                buffer.addDocuments(semanticDocuments);
                buffer.flush();
            }
            result.add(doc);
        }
        catch (SAXException e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Stopped by sax exception.");
            }
            throw new DocumentCreator.ParseException(e);
        }
        catch (BufferedEmbedder.EmbeddingsTokensMaxLimitExceededException e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Embeddings Tokens Max Limit Exceeded: {}", (Object)e.getMessage());
            }
            throw new IOException(e);
        }
        catch (URISyntaxException e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Markdown content could not be computed: {}", (Object)e.getMessage());
            }
            throw new IOException(e);
        }
        return result.toArray(new Document[0]);
    }

    protected static class ContentNotRecognizedException
    extends Exception {
        public ContentNotRecognizedException(String message) {
            super(message);
        }
    }
}

