package org.apache.tika.parser.pdf;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.james.mime4j.dom.field.FieldName;
import org.apache.jempbox.xmp.ResourceEvent;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* JADX WARN: Classes with same name are omitted:
  input_file:WEB-INF/lib/tika-app-1.3.jar:org/apache/tika/parser/pdf/PDFParser.class
 */
/* loaded from: input_file:WEB-INF/lib/tika-parsers-1.3.jar:org/apache/tika/parser/pdf/PDFParser.class */
public class PDFParser extends AbstractParser {
    private static final long serialVersionUID = -752276948656079347L;
    private boolean suppressDuplicateOverlappingText;
    public static final String PASSWORD = "org.apache.tika.parser.pdf.password";
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
    private boolean enableAutoSpace = true;
    private boolean extractAnnotationText = true;
    private boolean sortByPosition = false;

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        PDDocument pDDocument = null;
        TemporaryResources temporaryResources = new TemporaryResources();
        try {
            TikaInputStream cast = TikaInputStream.cast(inputStream);
            if (cast == null || !cast.hasFile()) {
                pDDocument = PDDocument.load((InputStream) new CloseShieldInputStream(inputStream), true);
            } else {
                pDDocument = PDDocument.load(new CloseShieldInputStream(inputStream), new RandomAccessFile(temporaryResources.createTemporaryFile(), "rw"), true);
            }
            if (pDDocument.isEncrypted()) {
                String str = null;
                PasswordProvider passwordProvider = (PasswordProvider) parseContext.get(PasswordProvider.class);
                if (passwordProvider != null) {
                    str = passwordProvider.getPassword(metadata);
                }
                if (str == null && metadata.get(PASSWORD) != null) {
                    str = metadata.get(PASSWORD);
                }
                if (str == null) {
                    str = "";
                }
                try {
                    pDDocument.decrypt(str);
                } catch (Exception e) {
                }
            }
            metadata.set("Content-Type", "application/pdf");
            extractMetadata(pDDocument, metadata);
            PDF2XHTML.process(pDDocument, contentHandler, metadata, this.extractAnnotationText, this.enableAutoSpace, this.suppressDuplicateOverlappingText, this.sortByPosition);
            extractEmbeddedDocuments(parseContext, pDDocument, contentHandler);
            if (pDDocument != null) {
                pDDocument.close();
            }
            temporaryResources.dispose();
        } catch (Throwable th) {
            if (pDDocument != null) {
                pDDocument.close();
            }
            temporaryResources.dispose();
            throw th;
        }
    }

    private void extractEmbeddedDocuments(ParseContext parseContext, PDDocument pDDocument, ContentHandler contentHandler) throws IOException, SAXException, TikaException {
        PDEmbeddedFilesNameTreeNode embeddedFiles;
        PDDocumentNameDictionary names = pDDocument.getDocumentCatalog().getNames();
        if (names == null || (embeddedFiles = names.getEmbeddedFiles()) == null) {
            return;
        }
        EmbeddedDocumentExtractor embeddedDocumentExtractor = (EmbeddedDocumentExtractor) parseContext.get(EmbeddedDocumentExtractor.class);
        if (embeddedDocumentExtractor == null) {
            embeddedDocumentExtractor = new ParsingEmbeddedDocumentExtractor(parseContext);
        }
        Map<String, Object> names2 = embeddedFiles.getNames();
        if (names2 != null) {
            for (Map.Entry<String, Object> entry : names2.entrySet()) {
                PDEmbeddedFile embeddedFile = ((PDComplexFileSpecification) entry.getValue()).getEmbeddedFile();
                Metadata metadata = new Metadata();
                metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, entry.getKey());
                metadata.set("Content-Type", embeddedFile.getSubtype());
                metadata.set("Content-Length", Long.toString(embeddedFile.getSize()));
                if (embeddedDocumentExtractor.shouldParseEmbedded(metadata)) {
                    TikaInputStream tikaInputStream = TikaInputStream.get(embeddedFile.createInputStream());
                    try {
                        embeddedDocumentExtractor.parseEmbedded(tikaInputStream, new EmbeddedContentHandler(contentHandler), metadata, false);
                        tikaInputStream.close();
                    } catch (Throwable th) {
                        tikaInputStream.close();
                        throw th;
                    }
                }
            }
        }
    }

    private void extractMetadata(PDDocument pDDocument, Metadata metadata) throws TikaException {
        PDDocumentInformation documentInformation = pDDocument.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, pDDocument.getNumberOfPages());
        addMetadata(metadata, TikaCoreProperties.TITLE, documentInformation.getTitle());
        addMetadata(metadata, TikaCoreProperties.CREATOR, documentInformation.getAuthor());
        addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, documentInformation.getCreator());
        addMetadata(metadata, TikaCoreProperties.KEYWORDS, documentInformation.getKeywords());
        addMetadata(metadata, "producer", documentInformation.getProducer());
        addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, documentInformation.getSubject());
        addMetadata(metadata, "trapped", documentInformation.getTrapped());
        try {
            addMetadata(metadata, ResourceEvent.ACTION_CREATED, documentInformation.getCreationDate());
            addMetadata(metadata, TikaCoreProperties.CREATED, documentInformation.getCreationDate());
        } catch (IOException e) {
        }
        try {
            Calendar modificationDate = documentInformation.getModificationDate();
            addMetadata(metadata, Metadata.LAST_MODIFIED, modificationDate);
            addMetadata(metadata, TikaCoreProperties.MODIFIED, modificationDate);
        } catch (IOException e2) {
        }
        List asList = Arrays.asList("Author", "Creator", "CreationDate", "ModDate", MSOffice.KEYWORDS, "Producer", FieldName.SUBJECT, "Title", "Trapped");
        for (COSName cOSName : documentInformation.getDictionary().keySet()) {
            String name = cOSName.getName();
            if (!asList.contains(name)) {
                addMetadata(metadata, name, documentInformation.getDictionary().getDictionaryObject(cOSName));
            }
        }
    }

    private void addMetadata(Metadata metadata, Property property, String str) {
        if (str != null) {
            metadata.add(property, str);
        }
    }

    private void addMetadata(Metadata metadata, String str, String str2) {
        if (str2 != null) {
            metadata.add(str, str2);
        }
    }

    private void addMetadata(Metadata metadata, String str, Calendar calendar) {
        if (calendar != null) {
            metadata.set(str, calendar.getTime().toString());
        }
    }

    private void addMetadata(Metadata metadata, Property property, Calendar calendar) {
        if (calendar != null) {
            metadata.set(property, calendar.getTime());
        }
    }

    private void addMetadata(Metadata metadata, String str, COSBase cOSBase) {
        if (cOSBase instanceof COSArray) {
            Iterator<COSBase> it = ((COSArray) cOSBase).toList().iterator();
            while (it.hasNext()) {
                addMetadata(metadata, str, it.next());
            }
        } else if (cOSBase instanceof COSString) {
            addMetadata(metadata, str, ((COSString) cOSBase).getString());
        } else {
            addMetadata(metadata, str, cOSBase.toString());
        }
    }

    public void setEnableAutoSpace(boolean z) {
        this.enableAutoSpace = z;
    }

    public boolean getEnableAutoSpace() {
        return this.enableAutoSpace;
    }

    public void setExtractAnnotationText(boolean z) {
        this.extractAnnotationText = z;
    }

    public boolean getExtractAnnotationText() {
        return this.extractAnnotationText;
    }

    public void setSuppressDuplicateOverlappingText(boolean z) {
        this.suppressDuplicateOverlappingText = z;
    }

    public boolean getSuppressDuplicateOverlappingText() {
        return this.suppressDuplicateOverlappingText;
    }

    public void setSortByPosition(boolean z) {
        this.sortByPosition = z;
    }

    public boolean getSortByPosition() {
        return this.sortByPosition;
    }
}
