package org.apache.tika.parser.microsoft;

import com.orientechnologies.orient.core.serialization.serializer.stream.OStreamSerializerRID;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpsf.CustomProperties;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.MarkUnsupportedException;
import org.apache.poi.hpsf.NoPropertySetStreamException;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hpsf.UnexpectedPropertySetTypeException;
import org.apache.poi.hslf.dev.PPTXMLDump;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/microsoft/OfficeParser.class */
public class OfficeParser implements Parser {
    private static final String SUMMARY_INFORMATION = "\u0005SummaryInformation";
    private static final String DOCUMENT_SUMMARY_INFORMATION = "\u0005DocumentSummaryInformation";

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        POIFSFileSystem pOIFSFileSystem = new POIFSFileSystem(inputStream);
        parseSummaryEntryIfExists(pOIFSFileSystem, "\u0005SummaryInformation", metadata);
        parseSummaryEntryIfExists(pOIFSFileSystem, "\u0005DocumentSummaryInformation", metadata);
        boolean z = false;
        Iterator entries = pOIFSFileSystem.getRoot().getEntries();
        while (entries.hasNext()) {
            Entry entry = (Entry) entries.next();
            String name = entry.getName();
            if (entry instanceof DocumentEntry) {
                if ("WordDocument".equals(name)) {
                    setType(metadata, "application/msword");
                    WordExtractor wordExtractor = new WordExtractor(pOIFSFileSystem);
                    addTextIfAny(xHTMLContentHandler, "header", wordExtractor.getHeaderText());
                    for (String str : wordExtractor.getParagraphText()) {
                        xHTMLContentHandler.element(OStreamSerializerRID.NAME, str);
                    }
                    for (String str2 : wordExtractor.getFootnoteText()) {
                        xHTMLContentHandler.element(OStreamSerializerRID.NAME, str2);
                    }
                    for (String str3 : wordExtractor.getCommentsText()) {
                        xHTMLContentHandler.element(OStreamSerializerRID.NAME, str3);
                    }
                    for (String str4 : wordExtractor.getEndnoteText()) {
                        xHTMLContentHandler.element(OStreamSerializerRID.NAME, str4);
                    }
                    addTextIfAny(xHTMLContentHandler, "footer", wordExtractor.getFooterText());
                } else if (PPTXMLDump.PPDOC_ENTRY.equals(name)) {
                    setType(metadata, "application/vnd.ms-powerpoint");
                    xHTMLContentHandler.element(OStreamSerializerRID.NAME, new PowerPointExtractor(pOIFSFileSystem).getText(true, true));
                } else if ("Workbook".equals(name)) {
                    setType(metadata, "application/vnd.ms-excel");
                    new ExcelExtractor().parse(pOIFSFileSystem, xHTMLContentHandler, (Locale) parseContext.get(Locale.class, Locale.getDefault()));
                } else if ("VisioDocument".equals(name)) {
                    setType(metadata, "application/vnd.visio");
                    for (String str5 : new VisioTextExtractor(pOIFSFileSystem).getAllText()) {
                        xHTMLContentHandler.element(OStreamSerializerRID.NAME, str5);
                    }
                } else if (!z && name.startsWith("__substg1.0_")) {
                    z = true;
                    setType(metadata, "application/vnd.ms-outlook");
                    new OutlookExtractor(pOIFSFileSystem).parse(xHTMLContentHandler, metadata);
                }
            }
        }
        xHTMLContentHandler.endDocument();
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        parse(inputStream, contentHandler, metadata, new ParseContext());
    }

    private void parseSummaryEntryIfExists(POIFSFileSystem pOIFSFileSystem, String str, Metadata metadata) throws IOException, TikaException {
        try {
            PropertySet propertySet = new PropertySet(new DocumentInputStream((DocumentEntry) pOIFSFileSystem.getRoot().getEntry(str)));
            if (propertySet.isSummaryInformation()) {
                parse(new SummaryInformation(propertySet), metadata);
            }
            if (propertySet.isDocumentSummaryInformation()) {
                parse(new DocumentSummaryInformation(propertySet), metadata);
            }
        } catch (FileNotFoundException e) {
        } catch (MarkUnsupportedException e2) {
            throw new TikaException("Invalid DocumentInputStream", e2);
        } catch (NoPropertySetStreamException e3) {
            throw new TikaException("Not a HPSF document", e3);
        } catch (UnexpectedPropertySetTypeException e4) {
            throw new TikaException("Unexpected HPSF document", e4);
        }
    }

    private void parse(SummaryInformation summaryInformation, Metadata metadata) {
        set(metadata, "title", summaryInformation.getTitle());
        set(metadata, MSOffice.AUTHOR, summaryInformation.getAuthor());
        set(metadata, MSOffice.KEYWORDS, summaryInformation.getKeywords());
        set(metadata, DublinCore.SUBJECT, summaryInformation.getSubject());
        set(metadata, MSOffice.LAST_AUTHOR, summaryInformation.getLastAuthor());
        set(metadata, MSOffice.COMMENTS, summaryInformation.getComments());
        set(metadata, MSOffice.TEMPLATE, summaryInformation.getTemplate());
        set(metadata, MSOffice.APPLICATION_NAME, summaryInformation.getApplicationName());
        set(metadata, MSOffice.REVISION_NUMBER, summaryInformation.getRevNumber());
        set(metadata, MSOffice.CREATION_DATE, summaryInformation.getCreateDateTime());
        set(metadata, MSOffice.CHARACTER_COUNT, summaryInformation.getCharCount());
        set(metadata, MSOffice.EDIT_TIME, summaryInformation.getEditTime());
        set(metadata, MSOffice.LAST_SAVED, summaryInformation.getLastSaveDateTime());
        set(metadata, MSOffice.PAGE_COUNT, summaryInformation.getPageCount());
        set(metadata, MSOffice.SECURITY, summaryInformation.getSecurity());
        set(metadata, MSOffice.WORD_COUNT, summaryInformation.getWordCount());
        set(metadata, MSOffice.LAST_PRINTED, summaryInformation.getLastPrinted());
    }

    private void parse(DocumentSummaryInformation documentSummaryInformation, Metadata metadata) {
        set(metadata, MSOffice.COMPANY, documentSummaryInformation.getCompany());
        set(metadata, MSOffice.MANAGER, documentSummaryInformation.getManager());
        set(metadata, "language", getLanguage(documentSummaryInformation));
        set(metadata, MSOffice.CATEGORY, documentSummaryInformation.getCategory());
    }

    private String getLanguage(DocumentSummaryInformation documentSummaryInformation) {
        CustomProperties customProperties = documentSummaryInformation.getCustomProperties();
        if (customProperties == null) {
            return null;
        }
        Object obj = customProperties.get("Language");
        if (obj instanceof String) {
            return (String) obj;
        }
        return null;
    }

    private void setType(Metadata metadata, String str) {
        metadata.set("Content-Type", str);
    }

    private void set(Metadata metadata, String str, String str2) {
        if (str2 != null) {
            metadata.set(str, str2);
        }
    }

    private void set(Metadata metadata, String str, Date date) {
        if (date != null) {
            metadata.set(str, date.toString());
        }
    }

    private void set(Metadata metadata, String str, long j) {
        if (j > 0) {
            metadata.set(str, Long.toString(j));
        }
    }

    private void addTextIfAny(XHTMLContentHandler xHTMLContentHandler, String str, String str2) throws SAXException {
        if (str2 == null || str2.length() <= 0) {
            return;
        }
        xHTMLContentHandler.startElement("div", "class", str);
        xHTMLContentHandler.element(OStreamSerializerRID.NAME, str2);
        xHTMLContentHandler.endElement("div");
    }
}
