Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

In the following, we detail which file types are handled explictly by which parsers.  NOTE: Tika also applies parsers to subtypes of these mime types.  For example, application/json is a subtype of application/javascript, which is a subtype of text/plain; therefore, a file identified as application/json will be parsed by the TextAndCSVParser.

tika-parsers-standard-package

This package and these parsers are included in tika-app and tika-server-standard.

<dependency>
    <groupId>org.apache.tika</groupId>
    <artifactId>tika-parsers-standard-package</artifactId>
    <version>2.8.0</version>
</dependency>


ParserMime TypeExtension
org.apache.tika.parser.apple.AppleSingleFileParserapplication/applefile
org.apache.tika.parser.apple.PListParserapplication/x-plist
org.apache.tika.parser.apple.PListParserapplication/x-bplist-itunes
org.apache.tika.parser.apple.PListParserapplication/x-bplist
org.apache.tika.parser.apple.PListParserapplication/x-bplist-memgraph
org.apache.tika.parser.apple.PListParserapplication/x-bplist-webarchive
org.apache.tika.parser.asm.ClassParserapplication/java-vm.class
org.apache.tika.parser.audio.AudioParseraudio/vnd.wave.wav
org.apache.tika.parser.audio.AudioParseraudio/x-wav.wav
org.apache.tika.parser.audio.AudioParseraudio/basic.au
org.apache.tika.parser.audio.AudioParseraudio/x-aiff.aif
org.apache.tika.parser.audio.MidiParserapplication/x-midi
org.apache.tika.parser.audio.MidiParseraudio/midi.mid
org.apache.tika.parser.code.SourceCodeParsertext/x-c++src.cpp
org.apache.tika.parser.code.SourceCodeParsertext/x-groovy.groovy
org.apache.tika.parser.code.SourceCodeParsertext/x-java-source.java
org.apache.tika.parser.crypto.Pkcs7Parserapplication/pkcs7-signature.p7s
org.apache.tika.parser.crypto.Pkcs7Parserapplication/pkcs7-mime.p7m
org.apache.tika.parser.crypto.TSDParserapplication/timestamped-data.tsd
org.apache.tika.parser.csv.TextAndCSVParsertext/csv.csv
org.apache.tika.parser.csv.TextAndCSVParsertext/tsv
org.apache.tika.parser.csv.TextAndCSVParsertext/plain.txt
org.apache.tika.parser.dbf.DBFParserapplication/x-dbf.dbf
org.apache.tika.parser.dgn.DGN8Parserimage/vnd.dgn; version=8
org.apache.tika.parser.dif.DIFParserapplication/dif+xml.dif
org.apache.tika.parser.dwg.DWGParserimage/vnd.dwg.dwg
org.apache.tika.parser.epub.EpubParserapplication/x-ibooks+zip.ibooks
org.apache.tika.parser.epub.EpubParserapplication/epub+zip.epub
org.apache.tika.parser.executable.ExecutableParserapplication/x-msdownload.dll
org.apache.tika.parser.executable.ExecutableParserapplication/x-sharedlib
org.apache.tika.parser.executable.ExecutableParserapplication/x-elf
org.apache.tika.parser.executable.ExecutableParserapplication/x-object
org.apache.tika.parser.executable.ExecutableParserapplication/x-executable
org.apache.tika.parser.executable.ExecutableParserapplication/x-coredump
org.apache.tika.parser.external.CompositeExternalParservideo/avi.avi
org.apache.tika.parser.external.CompositeExternalParservideo/mpeg.mpeg
org.apache.tika.parser.external.CompositeExternalParservideo/x-msvideo.avi
org.apache.tika.parser.external.CompositeExternalParservideo/mp4.mp4
org.apache.tika.parser.feed.FeedParserapplication/atom+xml.atom
org.apache.tika.parser.feed.FeedParserapplication/rss+xml.rss
org.apache.tika.parser.font.AdobeFontMetricParserapplication/x-font-adobe-metric.afm
org.apache.tika.parser.font.TrueTypeParserapplication/x-font-ttf.ttf
org.apache.tika.parser.html.HtmlParsertext/html.html
org.apache.tika.parser.html.HtmlParserapplication/vnd.wap.xhtml+xml
org.apache.tika.parser.html.HtmlParserapplication/x-asp
org.apache.tika.parser.html.HtmlParserapplication/xhtml+xml.xhtml
org.apache.tika.parser.http.HttpParserapplication/x-httpresponse
org.apache.tika.parser.hwp.HwpV5Parserapplication/x-hwp-v5
org.apache.tika.parser.image.BPGParserimage/bpg
org.apache.tika.parser.image.BPGParserimage/x-bpg.bpg
org.apache.tika.parser.image.HeifParserimage/heic-sequence
org.apache.tika.parser.image.HeifParserimage/heif.heif
org.apache.tika.parser.image.HeifParserimage/heic.heic
org.apache.tika.parser.image.HeifParserimage/heif-sequence
org.apache.tika.parser.image.ICNSParserimage/icns.icns
org.apache.tika.parser.image.ImageParserimage/png.png
org.apache.tika.parser.image.ImageParserimage/vnd.wap.wbmp.wbmp
org.apache.tika.parser.image.ImageParserimage/x-jbig2.jb2
org.apache.tika.parser.image.ImageParserimage/bmp.bmp
org.apache.tika.parser.image.ImageParserimage/x-xcf.xcf
org.apache.tika.parser.image.ImageParserimage/gif.gif
org.apache.tika.parser.image.ImageParserimage/x-icon.ico
org.apache.tika.parser.image.ImageParserimage/x-ms-bmp.bmp
org.apache.tika.parser.image.JXLParserimage/jxl.jxl
org.apache.tika.parser.image.JpegParserimage/jpeg.jpg
org.apache.tika.parser.image.PSDParserimage/vnd.adobe.photoshop.psd
org.apache.tika.parser.image.TiffParserimage/tiff.tiff
org.apache.tika.parser.image.WebPParserimage/webp.webp
org.apache.tika.parser.indesign.IDMLParserapplication/vnd.adobe.indesign-idml-package.idml
org.apache.tika.parser.iptc.IptcAnpaParsertext/vnd.iptc.anpa.anpa
org.apache.tika.parser.iwork.IWorkPackageParserapplication/vnd.apple.keynote.key
org.apache.tika.parser.iwork.IWorkPackageParserapplication/vnd.apple.iwork
org.apache.tika.parser.iwork.IWorkPackageParserapplication/vnd.apple.numbers.numbers
org.apache.tika.parser.iwork.IWorkPackageParserapplication/vnd.apple.pages.pages
org.apache.tika.parser.iwork.iwana.IWork13PackageParserapplication/vnd.apple.numbers.13
org.apache.tika.parser.iwork.iwana.IWork13PackageParserapplication/vnd.apple.unknown.13
org.apache.tika.parser.iwork.iwana.IWork13PackageParserapplication/vnd.apple.pages.13
org.apache.tika.parser.iwork.iwana.IWork13PackageParserapplication/vnd.apple.keynote.13
org.apache.tika.parser.iwork.iwana.IWork18PackageParserapplication/vnd.apple.pages.18
org.apache.tika.parser.iwork.iwana.IWork18PackageParserapplication/vnd.apple.keynote.18
org.apache.tika.parser.iwork.iwana.IWork18PackageParserapplication/vnd.apple.numbers.18
org.apache.tika.parser.mail.RFC822Parsermessage/rfc822.eml
org.apache.tika.parser.mat.MatParserapplication/x-matlab-data.mat
org.apache.tika.parser.mbox.MboxParserapplication/mbox.mbox
org.apache.tika.parser.microsoft.EMFParserimage/emf.emf
org.apache.tika.parser.microsoft.JackcessParserapplication/x-msaccess.mdb
org.apache.tika.parser.microsoft.MSOwnerFileParserapplication/x-ms-owner
org.apache.tika.parser.microsoft.OfficeParserapplication/x-tika-msoffice-embedded; format=ole10_native
org.apache.tika.parser.microsoft.OfficeParserapplication/msword.doc
org.apache.tika.parser.microsoft.OfficeParserapplication/vnd.visio.vsd
org.apache.tika.parser.microsoft.OfficeParserapplication/x-tika-ole-drm-encrypted
org.apache.tika.parser.microsoft.OfficeParserapplication/vnd.ms-project.mpp
org.apache.tika.parser.microsoft.OfficeParserapplication/x-tika-msworks-spreadsheet.xlr
org.apache.tika.parser.microsoft.OfficeParserapplication/x-mspublisher.pub
org.apache.tika.parser.microsoft.OfficeParserapplication/vnd.ms-powerpoint.ppt
org.apache.tika.parser.microsoft.OfficeParserapplication/x-tika-msoffice
org.apache.tika.parser.microsoft.OfficeParserapplication/sldworks.sldprt
org.apache.tika.parser.microsoft.OfficeParserapplication/x-tika-ooxml-protected
org.apache.tika.parser.microsoft.OfficeParserapplication/vnd.ms-excel.xls
org.apache.tika.parser.microsoft.OfficeParserapplication/vnd.ms-outlook.msg
org.apache.tika.parser.microsoft.OldExcelParserapplication/vnd.ms-excel.workspace.3
org.apache.tika.parser.microsoft.OldExcelParserapplication/vnd.ms-excel.workspace.4
org.apache.tika.parser.microsoft.OldExcelParserapplication/vnd.ms-excel.sheet.2
org.apache.tika.parser.microsoft.OldExcelParserapplication/vnd.ms-excel.sheet.3
org.apache.tika.parser.microsoft.OldExcelParserapplication/vnd.ms-excel.sheet.4
org.apache.tika.parser.microsoft.TNEFParserapplication/vnd.ms-tnef
org.apache.tika.parser.microsoft.TNEFParserapplication/x-tnef
org.apache.tika.parser.microsoft.TNEFParserapplication/ms-tnef
org.apache.tika.parser.microsoft.WMFParserimage/wmf.wmf
org.apache.tika.parser.microsoft.activemime.ActiveMimeParserapplication/x-activemime
org.apache.tika.parser.microsoft.chm.ChmParserapplication/vnd.ms-htmlhelp.chm
org.apache.tika.parser.microsoft.chm.ChmParserapplication/x-chm
org.apache.tika.parser.microsoft.chm.ChmParserapplication/chm
org.apache.tika.parser.microsoft.onenote.OneNoteParserapplication/onenote; format=one.one
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-powerpoint.template.macroenabled.12.potm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-excel.addin.macroenabled.12.xlam
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.wordprocessingml.template.dotx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-excel.sheet.binary.macroenabled.12.xlsb
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.wordprocessingml.document.docx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-powerpoint.slide.macroenabled.12.sldm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-visio.drawing.vsdx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-powerpoint.slideshow.macroenabled.12.ppsm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-powerpoint.presentation.macroenabled.12.pptm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.presentationml.slide.sldx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-excel.sheet.macroenabled.12.xlsm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-word.template.macroenabled.12.dotm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-word.document.macroenabled.12.docm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-powerpoint.addin.macroenabled.12.ppam
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.spreadsheetml.template.xltx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-xpsdocument.xps
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-visio.drawing.macroenabled.12.vsdm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-visio.template.macroenabled.12.vstm
org.apache.tika.parser.microsoft.ooxml.OOXMLParsermodel/vnd.dwfx+xps.dwfx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.presentationml.template.potx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.presentationml.presentation.pptx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.spreadsheetml.sheet.xlsx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-visio.stencil.vssx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-visio.template.vstx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.openxmlformats-officedocument.presentationml.slideshow.ppsx
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-visio.stencil.macroenabled.12.vssm
org.apache.tika.parser.microsoft.ooxml.OOXMLParserapplication/vnd.ms-excel.template.macroenabled.12.xltm
org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParserapplication/vnd.ms-word2006ml
org.apache.tika.parser.microsoft.pst.OutlookPSTParserapplication/vnd.ms-outlook-pst.pst
org.apache.tika.parser.microsoft.rtf.RTFParserapplication/rtf.rtf
org.apache.tika.parser.microsoft.xml.SpreadsheetMLParserapplication/vnd.ms-spreadsheetml
org.apache.tika.parser.microsoft.xml.WordMLParserapplication/vnd.ms-wordml
org.apache.tika.parser.mif.MIFParserapplication/x-mif.mif
org.apache.tika.parser.mif.MIFParserapplication/vnd.mif.mif
org.apache.tika.parser.mif.MIFParserapplication/x-maker
org.apache.tika.parser.mp3.Mp3Parseraudio/mpeg.mpga
org.apache.tika.parser.mp4.MP4Parservideo/x-m4v.m4v
org.apache.tika.parser.mp4.MP4Parserapplication/mp4.mp4s
org.apache.tika.parser.mp4.MP4Parservideo/3gpp.3gp
org.apache.tika.parser.mp4.MP4Parservideo/3gpp2.3g2
org.apache.tika.parser.mp4.MP4Parservideo/quicktime.qt
org.apache.tika.parser.mp4.MP4Parseraudio/mp4.mp4a
org.apache.tika.parser.mp4.MP4Parservideo/mp4.mp4
org.apache.tika.parser.odf.FlatOpenDocumentParserapplication/vnd.oasis.opendocument.tika.flat.document
org.apache.tika.parser.odf.FlatOpenDocumentParserapplication/vnd.oasis.opendocument.flat.presentation.fodp
org.apache.tika.parser.odf.FlatOpenDocumentParserapplication/vnd.oasis.opendocument.flat.spreadsheet.fods
org.apache.tika.parser.odf.FlatOpenDocumentParserapplication/vnd.oasis.opendocument.flat.text.fodt
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.presentation.odp
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.chart.odc
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.text-web.oth
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.image.odi
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.graphics-template.otg
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.text-web.oth
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.spreadsheet-template.ots
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.spreadsheet-template.ots
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.sun.xml.writer.sxw
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.graphics-template.otg
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.graphics.odg
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.spreadsheet.ods
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.chart.odc
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.spreadsheet.ods
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.image.odi
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.text.odt
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.text-template.ott
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.formula-template.odft
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.formula.odf
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.image-template.oti
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.image-template.oti
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.presentation-template.otp
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.presentation-template.otp
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.text.odt
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.text-template.ott
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.chart-template.otc
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.chart-template.otc
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.formula-template.odft
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.text-master.otm
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.presentation.odp
org.apache.tika.parser.odf.OpenDocumentParserapplication/x-vnd.oasis.opendocument.graphics.odg
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.formula.odf
org.apache.tika.parser.odf.OpenDocumentParserapplication/vnd.oasis.opendocument.text-master.otm
org.apache.tika.parser.pdf.PDFParserapplication/pdf.pdf
org.apache.tika.parser.pkg.CompressorParserapplication/zlib
org.apache.tika.parser.pkg.CompressorParserapplication/x-gzip.gz
org.apache.tika.parser.pkg.CompressorParserapplication/x-bzip2.bz2
org.apache.tika.parser.pkg.CompressorParserapplication/x-compress.z
org.apache.tika.parser.pkg.CompressorParserapplication/x-java-pack200.pack
org.apache.tika.parser.pkg.CompressorParserapplication/x-lzma.lzma
org.apache.tika.parser.pkg.CompressorParserapplication/deflate64
org.apache.tika.parser.pkg.CompressorParserapplication/x-lz4.lz4
org.apache.tika.parser.pkg.CompressorParserapplication/x-snappy
org.apache.tika.parser.pkg.CompressorParserapplication/x-brotli.br
org.apache.tika.parser.pkg.CompressorParserapplication/gzip.gz
org.apache.tika.parser.pkg.CompressorParserapplication/x-bzip.bz
org.apache.tika.parser.pkg.CompressorParserapplication/x-xz.xz
org.apache.tika.parser.pkg.PackageParserapplication/x-tar.tar
org.apache.tika.parser.pkg.PackageParserapplication/java-archive.jar
org.apache.tika.parser.pkg.PackageParserapplication/x-arj.arj
org.apache.tika.parser.pkg.PackageParserapplication/x-archive.ar
org.apache.tika.parser.pkg.PackageParserapplication/zip.zip
org.apache.tika.parser.pkg.PackageParserapplication/x-cpio.cpio
org.apache.tika.parser.pkg.PackageParserapplication/x-tika-unix-dump
org.apache.tika.parser.pkg.PackageParserapplication/x-7z-compressed.7z
org.apache.tika.parser.pkg.RarParserapplication/x-rar-compressed.rar
org.apache.tika.parser.prt.PRTParserapplication/x-prt.prt
org.apache.tika.parser.sas.SAS7BDATParserapplication/x-sas-data.sd7
org.apache.tika.parser.tmx.TMXParserapplication/x-tmx.tmx
org.apache.tika.parser.video.FLVParservideo/x-flv.flv
org.apache.tika.parser.wacz.WACZParserapplication/x-wacz
org.apache.tika.parser.warc.WARCParserapplication/warc.warc
org.apache.tika.parser.wordperfect.QuattroProParserapplication/x-quattro-pro; version=9
org.apache.tika.parser.wordperfect.WordPerfectParserapplication/vnd.wordperfect; version=5.1
org.apache.tika.parser.wordperfect.WordPerfectParserapplication/vnd.wordperfect; version=5.0
org.apache.tika.parser.wordperfect.WordPerfectParserapplication/vnd.wordperfect; version=6.x
org.apache.tika.parser.xliff.XLIFF12Parserapplication/x-xliff+xml.xlf
org.apache.tika.parser.xliff.XLZParserapplication/x-xliff+zip.xlz
org.apache.tika.parser.xml.DcXMLParserapplication/xml.xml
org.apache.tika.parser.xml.DcXMLParserimage/svg+xml.svg
org.apache.tika.parser.xml.FictionBookParserapplication/x-fictionbook+xml.fb2
org.gagravarr.tika.FlacParseraudio/x-oggflac
org.gagravarr.tika.FlacParseraudio/x-flac.flac
org.gagravarr.tika.OggParseraudio/ogg.oga
org.gagravarr.tika.OggParserapplication/kate
org.gagravarr.tika.OggParserapplication/ogg.ogx
org.gagravarr.tika.OggParservideo/daala
org.gagravarr.tika.OggParservideo/x-ogguvs
org.gagravarr.tika.OggParservideo/x-ogm.ogm
org.gagravarr.tika.OggParseraudio/x-oggpcm
org.gagravarr.tika.OggParservideo/ogg.ogv
org.gagravarr.tika.OggParservideo/x-dirac.drc
org.gagravarr.tika.OggParservideo/x-oggrgb
org.gagravarr.tika.OggParservideo/x-oggyuv
org.gagravarr.tika.OpusParseraudio/opus.opus
org.gagravarr.tika.OpusParseraudio/ogg; codecs=opus
org.gagravarr.tika.SpeexParseraudio/ogg; codecs=speex
org.gagravarr.tika.SpeexParseraudio/speex.spx
org.gagravarr.tika.TheoraParservideo/theora
org.gagravarr.tika.VorbisParseraudio/vorbis.ogg


tika-parser-sqlite3-package

<dependency>
    <groupId>org.apache.tika</groupId>
    <artifactId>tika-parser-sqlite3-package</artifactId>
    <version>2.8.0</version>
</dependency>


ParserMime TypeExtension
org.apache.tika.parser.sqlite3.SQLite3Parserapplication/x-sqlite3