Lance,

I found that you can have a single config file that can have several
entities in it. My question now is how can I add entities without restarting
the Solr service? It doesn't really work otherwise but it looks like it
should becasue we call the /dataimport handler after the entire application
has been started and loaded. How Can I make the app load the /dataimport
handler at runtime?

Example config...

<dataConfig>
<dataSource type="HttpDataSource" />
  <document>
    <entity name="f"
            processor="FileListEntityProcessor"

 
baseDir="C:/Users/aestrada//SolrNET/solr-1.4.1/lucidworks/solr/conf/dataimporthandler"
            fileName=".*xml"
            newerThan="'NOW-3DAYS'"
            recursive="true"
            rootEntity="false"
            dataSource="null">

      <entity name="cnn"
              pk="link"
              url="http://rss.cnn.com/rss/cnn_topstories.rss";
              processor="XPathEntityProcessor"
              forEach="/rss/channel | /rss/channel/item"
              transformer="HTMLStripTransformer">

        <field column="source"       xpath="/rss/channel/title"
commonField="true" />
        <field column="source-link"  xpath="/rss/channel/link"
 commonField="true" />
        <field column="subject"      xpath="/rss/channel/description"
commonField="true" />
        <field column="title"        xpath="/rss/channel/item/title" />
        <field column="link"         xpath="/rss/channel/item/link" />
        <field column="description"  xpath="/rss/channel/item/description"
stripHTML="true" />
        <field column="creator"      xpath="/rss/channel/item/creator" />
        <field column="item-subject" xpath="/rss/channel/item/subject" />
        <field column="author"       xpath="/rss/channel/item/author" />
        <field column="comments"     xpath="/rss/channel/item/comments" />
        <field column="pubdate"      xpath="/rss/channel/item/pubDate" />
      </entity>

      <entity name="ABC"
        pk="link"
        url="http://feeds.abcnews.com/abcnews/topstories";
        processor="XPathEntityProcessor"
        forEach="/rss/channel | /rss/channel/item"
        transformer="HTMLStripTransformer">

        <field column="source"       xpath="/rss/channel/title"
commonField="true" />
        <field column="source-link"  xpath="/rss/channel/link"
 commonField="true" />
        <field column="subject"      xpath="/rss/channel/description"
commonField="true" />
        <field column="title"        xpath="/rss/channel/item/title" />
        <field column="link"         xpath="/rss/channel/item/link" />
        <field column="description"  xpath="/rss/channel/item/description"
stripHTML="true" />
        <field column="creator"      xpath="/rss/channel/item/creator" />
        <field column="item-subject" xpath="/rss/channel/item/subject" />
        <field column="author"       xpath="/rss/channel/item/author" />
        <field column="comments"     xpath="/rss/channel/item/comments" />
        <field column="pubdate"      xpath="/rss/channel/item/pubDate" />
      </entity>

      <entity name="CBS"
        pk="link"
        url="http://feeds.cbsnews.com/CBSNewsMain?format=xml";
        processor="XPathEntityProcessor"
        forEach="/rss/channel | /rss/channel/item"
        transformer="HTMLStripTransformer">

        <field column="source"       xpath="/rss/channel/title"
commonField="true" />
        <field column="source-link"  xpath="/rss/channel/link"
 commonField="true" />
        <field column="subject"      xpath="/rss/channel/description"
commonField="true" />
        <field column="title"        xpath="/rss/channel/item/title" />
        <field column="link"         xpath="/rss/channel/item/link" />
        <field column="description"  xpath="/rss/channel/item/description"
stripHTML="true" />
        <field column="creator"      xpath="/rss/channel/item/creator" />
        <field column="item-subject" xpath="/rss/channel/item/subject" />
        <field column="author"       xpath="/rss/channel/item/author" />
        <field column="comments"     xpath="/rss/channel/item/comments" />
        <field column="pubdate"      xpath="/rss/channel/item/pubDate" />
      </entity>
      <entity name="whitehouse"
  pk="link"
  url="http://www.whitehouse.gov/feed/blog/white-house";
  processor="XPathEntityProcessor"
  forEach="/rss/channel | /rss/channel/item"
  transformer="HTMLStripTransformer">

        <field column="source"       xpath="/rss/channel/title"
commonField="true" />
        <field column="source-link"  xpath="/rss/channel/link"
 commonField="true" />
        <field column="subject"      xpath="/rss/channel/description"
commonField="true" />
        <field column="title"        xpath="/rss/channel/item/title" />
        <field column="link"         xpath="/rss/channel/item/link" />
        <field column="description"  xpath="/rss/channel/item/description"
stripHTML="true" />
        <field column="creator"      xpath="/rss/channel/item/creator" />
        <field column="item-subject" xpath="/rss/channel/item/subject" />
        <field column="author"       xpath="/rss/channel/item/author" />
        <field column="comments"     xpath="/rss/channel/item/comments" />
        <field column="pubdate"      xpath="/rss/channel/item/pubDate" />
      </entity>
          </entity>
  </document>
</dataConfig>


On Fri, Dec 10, 2010 at 10:38 PM, Lance Norskog <goks...@gmail.com> wrote:

> There is I believe no way to do this without separate copies of your
> script. Each 'handler=/dataimport' has to refer to a separate config
> file.
>
> You can make several copies and name them config1.xml, config2.xml
> etc. You'll have to call each one manually, so you have to manage your
> own thread pool.
>
> On Fri, Dec 10, 2010 at 8:15 AM, Adam Estrada
> <estrada.adam.gro...@gmail.com> wrote:
> > All,
> >
> > Right now I am using the default DIH config that comes with the Solr
> > examples. I update my index using the dataimport handler here
> >
> > http://localhost:8983/solr/admin/dataimport.jsp?handler=/dataimport
> >
> > This works fine but I want to be able to index more than just one feed at
> a
> > time and more importantly I want to be able to index both ATOM and RSS
> feeds
> > which means that the schema will definitely be different.
> >
> > There is a good example on how to index all of the example docs in the
> > SolrNet example application but that is looking for xml files with the
> > properly formatted xml tags.
> >
> >                foreach (var file in
> > Directory.GetFiles(Server.MapPath("/exampledocs"), "*.xml"))
> >                {
> >                    connection.Post("/update", File.ReadAllText(file,
> > Encoding.UTF8));
> >                }
> >                solr.Commit();
> >
> > example xml:
> >
> > - <add>
> >  - <doc>
> >   <field name="*id*">F8V7067-APL-KIT</field>
> >   <field name="*name*">Belkin Mobile Power Cord for iPod w/ Dock</field>
> >   <field name="*manu*">Belkin</field>
> >   <field name="*cat*">electronics</field>
> >   <field name="*cat*">connector</field>
> >   <field name="*features*">car power adapter, white</field>
> >   <field name="*weight*">4</field>
> >   <field name="*price*">19.95</field>
> >   <field name="*popularity*">1</field>
> >   <field name="*inStock*">false</field>
> >   <field name="*manufacturedate_dt*">2005-08-01T16:30:25Z</field>
> >  </doc>
> > </add>
> >
> > This obviously won't help me when trying to grab random RSS feeds so my
> > question is, how can I ingest several feeds at a time? Can I do this
> > programmatically or is there a configuration option I am missing?
> >
> > Thanks,
> > Adam
> >
>
>
>
> --
> Lance Norskog
> goks...@gmail.com
>

Reply via email to