Repository: camel Updated Branches: refs/heads/camel-2.13.x acb11019f -> 8a67c0297 refs/heads/camel-2.14.x 63bb0cfb5 -> a051b1d23
CAMEL-7784: camel-rss should match feeds on same timestamp but having mutliple feeds. Using a lru cache to keep max 1000 entries for duplicate detection to avoid the map to grow forever under certain conditions. Thanks to Andy Flatt for the patch. Project: http://git-wip-us.apache.org/repos/asf/camel/repo Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/8a67c029 Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/8a67c029 Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/8a67c029 Branch: refs/heads/camel-2.13.x Commit: 8a67c0297d4ce6d70529b58d41cea58802730893 Parents: acb1101 Author: Claus Ibsen <davscl...@apache.org> Authored: Sun Nov 30 15:57:46 2014 +0100 Committer: Claus Ibsen <davscl...@apache.org> Committed: Sun Nov 30 15:58:35 2014 +0100 ---------------------------------------------------------------------- .../camel/component/rss/UpdatedDateFilter.java | 33 +++++++---- .../component/rss/UpdatedDateFilterTest.java | 59 ++++++++++++++++++++ 2 files changed, 81 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/camel/blob/8a67c029/components/camel-rss/src/main/java/org/apache/camel/component/rss/UpdatedDateFilter.java ---------------------------------------------------------------------- diff --git a/components/camel-rss/src/main/java/org/apache/camel/component/rss/UpdatedDateFilter.java b/components/camel-rss/src/main/java/org/apache/camel/component/rss/UpdatedDateFilter.java index 73456d6..4f1b2b7 100644 --- a/components/camel-rss/src/main/java/org/apache/camel/component/rss/UpdatedDateFilter.java +++ b/components/camel-rss/src/main/java/org/apache/camel/component/rss/UpdatedDateFilter.java @@ -17,47 +17,58 @@ package org.apache.camel.component.rss; import java.util.Date; +import java.util.Map; import com.sun.syndication.feed.synd.SyndEntry; - import org.apache.camel.component.feed.EntryFilter; import org.apache.camel.component.feed.FeedEndpoint; +import org.apache.camel.util.LRUCache; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Filters out all entries which occur before the last time of the entry we saw (assuming - * entries arrive sorted in order). - * - * @version + * Filters out all entries which occur before the last time of the entry we saw + * (assuming entries arrive sorted in order). */ public class UpdatedDateFilter implements EntryFilter { private static final Logger LOG = LoggerFactory.getLogger(UpdatedDateFilter.class); private Date lastUpdate; + // use a LRU so we only keep the last 1000 elements to avoid growing to large + private Map<Integer, Integer> entriesForLastUpdate = new LRUCache<Integer, Integer>(1000); public UpdatedDateFilter(Date lastUpdate) { this.lastUpdate = lastUpdate; } - public boolean isValidEntry(FeedEndpoint endpoint, Object feed, Object entry) { - Date updated = ((SyndEntry)entry).getUpdatedDate(); + public boolean isValidEntry(FeedEndpoint endpoint, Object feed, Object entry) { + Date updated = ((SyndEntry) entry).getUpdatedDate(); if (updated == null) { // never been updated so get published date - updated = ((SyndEntry)entry).getPublishedDate(); + updated = ((SyndEntry) entry).getPublishedDate(); } if (updated == null) { LOG.debug("No updated time for entry so assuming its valid: entry=[{}]", entry); return true; } if (lastUpdate != null) { - if (lastUpdate.after(updated) || lastUpdate.equals(updated)) { + if (lastUpdate.after(updated)) { LOG.debug("Entry is older than lastupdate=[{}], no valid entry=[{}]", lastUpdate, entry); return false; + } else { + Integer hash = entry.hashCode(); + if (lastUpdate.equals(updated)) { + if (entriesForLastUpdate.containsKey(hash)) { + LOG.debug("Already processed entry=[{}]", entry); + return false; + } + } else { + entriesForLastUpdate.clear(); + } + entriesForLastUpdate.put(hash, hash); } } - lastUpdate = updated; + lastUpdate = updated; return true; } - } http://git-wip-us.apache.org/repos/asf/camel/blob/8a67c029/components/camel-rss/src/test/java/org/apache/camel/component/rss/UpdatedDateFilterTest.java ---------------------------------------------------------------------- diff --git a/components/camel-rss/src/test/java/org/apache/camel/component/rss/UpdatedDateFilterTest.java b/components/camel-rss/src/test/java/org/apache/camel/component/rss/UpdatedDateFilterTest.java new file mode 100644 index 0000000..653e174 --- /dev/null +++ b/components/camel-rss/src/test/java/org/apache/camel/component/rss/UpdatedDateFilterTest.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.component.rss; + +import java.util.Date; + +import com.sun.syndication.feed.synd.SyndEntry; +import com.sun.syndication.feed.synd.SyndEntryImpl; +import com.sun.syndication.feed.synd.SyndFeedImpl; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class UpdatedDateFilterTest { + + private UpdatedDateFilter fixture; + private Date now; + + @Before + public void setup() { + now = new Date(); + fixture = new UpdatedDateFilter(now); + } + + @Test + public void testFilter() { + SyndEntry entry = new SyndEntryImpl(); + entry.setPublishedDate(now); + entry.setAuthor("ANDY"); + assertTrue(fixture.isValidEntry(new RssEndpoint(), new SyndFeedImpl(), entry)); + + entry = new SyndEntryImpl(); + entry.setPublishedDate(now); + entry.setAuthor("ANDY"); + assertFalse(fixture.isValidEntry(new RssEndpoint(), new SyndFeedImpl(), entry)); + + entry = new SyndEntryImpl(); + entry.setPublishedDate(now); + entry.setAuthor("FRED"); + assertTrue(fixture.isValidEntry(new RssEndpoint(), new SyndFeedImpl(), entry)); + } + +}