0down votefavorite <http://stackoverflow.com/questions/34962280/solr-indexing-pdf-attachments-not-working-in-ubuntu#>
I have a problem with integrating solr in Ubuntu server.Before using solr on ubuntu server i tested it on my mac it was working perfectly for DIH request handler and update/extract. it indexed my PDF,Doc,Docx documents.so after installing solr on ubuntu server and using the same configuration files and librairies. i've found out that solr doesn't index PDf documents and none Error and any exceptions in solr log.But i can search over .Doc and .Docx documents. here some parts of my solrconfig.xml contents : <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" /> <requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler" > <lst name="defaults"> <str name="lowernames">true</str> <str name="fmap.meta">ignored_</str> <str name="fmap.content">_text_</str> </lst> </requestHandler> DIH config: <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> <lst name="defaults"> <str name="config">tika.config.xml</str> </lst> </requestHandler> tika.config.xml <dataConfig> <dataSource type="BinFileDataSource" /> <document> <entity name="files" processor="FileListEntityProcessor" dataSource="null" rootEntity="false" baseDir="D:\Lucene\document" fileName=".*\.(DOC)|(PDF)|(pdf)|(doc)|(docx)|(ppt)" onError="skip" recursive="true"> <field column="fileAbsolutePath" name="id" /> <field column="fileSize" name="size" /> <field column="fileLastModified" name="lastModified" /> <field column="file" name="fileName" /> <entity name="documentImport" dataSource="files" processor="TikaEntityProcessor" url="${files.fileAbsolutePath}" format="text"> <field column="Author" name="author" meta="true"/> <field column="title" name="title" meta="true"/> <field column="text" name="text"/> <field column="text" name="content"/> <field column="LastModifiedBy" name="LastModifiedBy" meta="true"/> </entity> </entity> </document> </dataConfig>