Java Code Examples for com.rometools.rome.feed.synd.SyndFeed#getEntries()

The following examples show how to use com.rometools.rome.feed.synd.SyndFeed#getEntries() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GoogleBaseParserTest.java    From rome with Apache License 2.0 6 votes vote down vote up
/**
 * Test of parse method, of class com.totsp.xml.syndication.base.io.GoogleBaseParser.
 */
public void testNews2Parse() throws Exception {
    LOG.debug("testNews2Parse");
    final SyndFeedInput input = new SyndFeedInput();
    final Calendar cal = Calendar.getInstance();
    cal.setTimeInMillis(0);
    final SyndFeed feed = input.build(new File(super.getTestFile("xml/news2.xml")));
    final List<SyndEntry> entries = feed.getEntries();
    final SyndEntry entry = entries.get(0);
    final Article module = (Article) entry.getModule(GoogleBase.URI);
    Assert.assertEquals("Image Link", "http://www.providers-website.com/image1.jpg", module.getImageLinks()[0].toString());
    cal.set(2007, 2, 20, 0, 0, 0);
    Assert.assertEquals("Expiration Date", cal.getTime(), module.getExpirationDate());
    this.assertEquals("Labels", new String[] { "news", "old" }, module.getLabels());
    Assert.assertEquals("Source", "Journal", module.getNewsSource());
    cal.set(1961, 3, 12, 0, 0, 0);
    Assert.assertEquals("Pub Date", cal.getTime(), module.getPublishDate());
    this.assertEquals("Authors", new String[] { "James Smith" }, module.getAuthors());
    Assert.assertEquals("Pages", new Integer(1), module.getPages());

}
 
Example 2
Source File: GoogleBaseParserTest.java    From rome with Apache License 2.0 6 votes vote down vote up
/**
 * Test of parse method, of class com.totsp.xml.syndication.base.io.GoogleBaseParser.
 */
public void testWanted2Parse() throws Exception {
    LOG.debug("testVehicle2Parse");
    final SyndFeedInput input = new SyndFeedInput();
    final Calendar cal = Calendar.getInstance();
    cal.setTimeInMillis(0);
    final SyndFeed feed = input.build(new File(super.getTestFile("xml/wanted2.xml")));
    final List<SyndEntry> entries = feed.getEntries();
    final SyndEntry entry = entries.get(0);
    final Wanted module = (Wanted) entry.getModule(GoogleBase.URI);
    Assert.assertEquals("Image Link", "http://www.providers-website.com/image1.jpg", module.getImageLinks()[0].toString());
    cal.set(2005, 11, 20, 0, 0, 0);
    Assert.assertEquals("Expiration Date", cal.getTime(), module.getExpirationDate());
    this.assertEquals("Labels", new String[] { "Wanted", "Truck" }, module.getLabels());
    Assert.assertEquals("Location", "123 Main Street, Anytown, CA, 12345, USA", module.getLocation());
}
 
Example 3
Source File: GoogleBaseParserTest.java    From rome with Apache License 2.0 6 votes vote down vote up
/**
 * Test of parse method, of class com.totsp.xml.syndication.base.io.GoogleBaseParser.
 */
public void testReview2Parse() throws Exception {
    LOG.debug("testReview2Parse");
    final SyndFeedInput input = new SyndFeedInput();
    final Calendar cal = Calendar.getInstance();
    cal.setTimeInMillis(0);
    final SyndFeed feed = input.build(new File(super.getTestFile("xml/reviews2.xml")));
    final List<SyndEntry> entries = feed.getEntries();
    final SyndEntry entry = entries.get(0);
    final Review module = (Review) entry.getModule(GoogleBase.URI);
    Assert.assertEquals("Image Link", "http://www.providers-website.com/image1.jpg", module.getImageLinks()[0].toString());
    cal.set(2005, 11, 20, 0, 0, 0);
    Assert.assertEquals("Expiration Date", cal.getTime(), module.getExpirationDate());
    this.assertEquals("Labels", new String[] { "Review", "Earth", "Google" }, module.getLabels());
    cal.set(2005, 2, 24);
    Assert.assertEquals("PubDate", cal.getTime(), module.getPublishDate());
    this.assertEquals("Authors", new String[] { "Jimmy Smith" }, module.getAuthors());
    Assert.assertEquals("Name of Item Rev", "Google Earth", module.getNameOfItemBeingReviewed());
    Assert.assertEquals("Type", "Product", module.getReviewType());
    Assert.assertEquals("Rever Type", "editorial", module.getReviewerType());
    Assert.assertEquals("Rating", new Float(5), module.getRating());
    Assert.assertEquals("URL of Item", new URL("http://earth.google.com/"), module.getUrlOfItemBeingReviewed());

}
 
Example 4
Source File: DeltaSyndFeedInfo.java    From rome with Apache License 2.0 6 votes vote down vote up
/**
 * Gets a filtered version of the SyndFeed that only has entries that were changed in the last
 * setSyndFeed() call.
 *
 * @return
 */
@Override
public synchronized SyndFeed getSyndFeed() {
    try {
        final SyndFeed feed = (SyndFeed) super.getSyndFeed().clone();

        final List<SyndEntry> changedEntries = new ArrayList<SyndEntry>();

        final List<SyndEntry> entries = feed.getEntries();
        for (final SyndEntry entry : entries) {
            if (changedMap.containsKey(entry.getUri())) {
                changedEntries.add(entry);
            }
        }

        feed.setEntries(changedEntries);

        return feed;
    } catch (final CloneNotSupportedException ex) {
        throw new RuntimeException(ex);
    }
}
 
Example 5
Source File: RssFeedGeneratorTest.java    From archiva with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testNewFeed()
    throws Exception
{
    List<RssFeedEntry> entries = new ArrayList<>();
    RssFeedEntry entry = new RssFeedEntry( "Item 1" );
    
    Date whenGathered = new Date( System.currentTimeMillis() );

    entry.setDescription( "RSS 2.0 feed item 1." );
    entry.setPublishedDate( whenGathered );
    entries.add( entry );

    entry = new RssFeedEntry( "Item 2" );
    entry.setDescription( "RSS 2.0 feed item 2." );
    entry.setPublishedDate( whenGathered );
    entries.add( entry );

    entry = new RssFeedEntry( "Item 3" );
    entry.setDescription( "RSS 2.0 feed item 3." );
    entry.setPublishedDate( whenGathered );
    entries.add( entry );

    SyndFeed feed =
        generator.generateFeed( "Test Feed", "The test feed from Archiva.", entries );

    assertEquals( "Test Feed", feed.getTitle() );        
    assertEquals( "The test feed from Archiva.", feed.getDescription() );
    assertEquals( "en-us", feed.getLanguage() );
    assertEquals( entries.get( 2 ).getPublishedDate(), feed.getPublishedDate() );

    List<SyndEntry> syndEntries = feed.getEntries();
    assertEquals( 3, syndEntries.size() );
    assertEquals( "Item 1", syndEntries.get( 0 ).getTitle() );
    assertEquals( "Item 2", syndEntries.get( 1 ).getTitle() );
    assertEquals( "Item 3", syndEntries.get( 2 ).getTitle() );
}
 
Example 6
Source File: GoogleBaseParserTest.java    From rome with Apache License 2.0 5 votes vote down vote up
/**
 * Test of parse method, of class com.totsp.xml.syndication.base.io.GoogleBaseParser.
 */
public void testVehicle2Parse() throws Exception {
    LOG.debug("testVehicle2Parse");
    final SyndFeedInput input = new SyndFeedInput();
    final Calendar cal = Calendar.getInstance();
    cal.setTimeInMillis(0);
    final SyndFeed feed = input.build(new File(super.getTestFile("xml/vehicles2.xml")));
    final List<SyndEntry> entries = feed.getEntries();
    final SyndEntry entry = entries.get(0);
    final Vehicle module = (Vehicle) entry.getModule(GoogleBase.URI);
    Assert.assertEquals("Image Link", "http://www.providers-website.com/image1.jpg", module.getImageLinks()[0].toString());
    cal.set(2005, 11, 20, 0, 0, 0);
    Assert.assertEquals("Expiration Date", cal.getTime(), module.getExpirationDate());
    this.assertEquals("Labels", new String[] { "car", "mini" }, module.getLabels());
    cal.set(2005, 2, 24);
    Assert.assertEquals("Currency", CurrencyEnumeration.USD, module.getCurrency());
    Assert.assertEquals("Price", 24000, module.getPrice().getValue(), 0);
    Assert.assertEquals("PriceType", PriceTypeEnumeration.STARTING, module.getPriceType());
    this.assertEquals("Payment Accepted", new PaymentTypeEnumeration[] { PaymentTypeEnumeration.CHECK, PaymentTypeEnumeration.VISA,
            PaymentTypeEnumeration.MASTERCARD }, module.getPaymentAccepted());
    Assert.assertEquals("Payment Notes", "financing available", module.getPaymentNotes());
    Assert.assertEquals("Vehicle Type", "car", module.getVehicleType());
    Assert.assertEquals("Make", "Mini", module.getMake());
    Assert.assertEquals("Model", "Cooper S", module.getModel());
    Assert.assertEquals("Year", "2006", module.getYear().toString());
    Assert.assertEquals("Mileage", new Integer(0), module.getMileage());
    this.assertEquals("Colors", new String[] { "red" }, module.getColors());
    Assert.assertEquals("Vin", "1M8GDM9AXKP042788", module.getVin());
    Assert.assertEquals("Location", "123 Main Street, Anytown, CA, 12345, USA", module.getLocation());

}
 
Example 7
Source File: TestSyndFeedAtom10b.java    From rome with Apache License 2.0 5 votes vote down vote up
public void testXmlBaseConformance() throws Exception {
    final SyndFeed feed = getSyndFeed(false);
    final List<SyndEntry> entries = feed.getEntries();
    for (int index = 0; index < entries.size(); index++) {
        final SyndEntry entry = entries.get(index);
        assertEquals("Incorrect URI: " + entry.getLink() + " in entry [" + entry.getTitle() + "]", "http://example.org/tests/base/result.html",
                entry.getLink());
    }
}
 
Example 8
Source File: CategoryUpdater.java    From SimpleNews with Apache License 2.0 5 votes vote down vote up
@Override
public List<Entry> call() throws Exception {
    List<Entry> feedEntries = new ArrayList<>();
    try {
        SyndFeed syndFeed = input.build(new XmlReader(new URL(mFeed.getXmlUrl())));
        String title = syndFeed.getTitle();
        if (mFeed.getTitle() == null) {
            mFeed.setTitle(title);
            databaseHandler.updateFeed(mFeed);
        }
        for (SyndEntry item : syndFeed.getEntries()) {
            Entry entry = Utilities.getEntryFromRSSItem(item, mFeed.getId(), title, category.getId());
            if (entry == null) {
                continue;
            }
            if (deprecatedTime == null || (entry.getDate() != null && entry.getDate() > deprecatedTime)) {
                feedEntries.add(entry);
            }
        }
    } catch (Exception e) {
        Log.e("CategoryUpdater", "XmlReader could not read feed", e);
        return null;
    }
    databaseHandler.removeEntries(category.getId(), mFeed.getId(), null);
    databaseHandler.addEntries(category.getId(), mFeed.getId(), feedEntries);
    getPartResult(feedEntries);
    return feedEntries;
}
 
Example 9
Source File: GoogleTest.java    From rome with Apache License 2.0 5 votes vote down vote up
public static void testGoogleVideo() throws Exception {
    final SyndFeedInput input = new SyndFeedInput();
    final SyndFeed feed = input.build(new InputStreamReader(new URL("http://video.google.com/videofeed?type=top100new&num=20&output=rss").openStream()));
    for (final Object element : feed.getEntries()) {
        final SyndEntry entry = (SyndEntry) element;
        final MediaEntryModule m = (MediaEntryModule) entry.getModule(MediaModule.URI);
        System.out.print(m);
    }
}
 
Example 10
Source File: FeedReader.java    From reactor-workshop with GNU General Public License v3.0 5 votes vote down vote up
public List<SyndEntry> fetch(URL url) throws IOException, FeedException, ParserConfigurationException, SAXException {
    final String feedBody = get(url);
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = factory.newDocumentBuilder();
    ByteArrayInputStream is = new ByteArrayInputStream(applyAtomNamespaceFix(feedBody).getBytes(UTF_8));
    Document doc = builder.parse(is);
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(doc);
    return feed.getEntries();
}
 
Example 11
Source File: ConverterForRSS090.java    From rome with Apache License 2.0 5 votes vote down vote up
protected WireFeed createRealFeed(final String type, final SyndFeed syndFeed) {
    final Channel channel = new Channel(type);
    channel.setModules(ModuleUtils.cloneModules(syndFeed.getModules()));
    channel.setStyleSheet(syndFeed.getStyleSheet());
    channel.setEncoding(syndFeed.getEncoding());

    channel.setTitle(syndFeed.getTitle());
    final String link = syndFeed.getLink();
    final List<SyndLink> links = syndFeed.getLinks();
    if (link != null) {
        channel.setLink(link);
    } else if (!links.isEmpty()) {
        channel.setLink(links.get(0).getHref());
    }

    channel.setDescription(syndFeed.getDescription());

    final SyndImage sImage = syndFeed.getImage();
    if (sImage != null) {
        channel.setImage(createRSSImage(sImage));
    }

    final List<SyndEntry> sEntries = syndFeed.getEntries();
    if (sEntries != null) {
        channel.setItems(createRSSItems(sEntries));
    }

    final List<Element> foreignMarkup = syndFeed.getForeignMarkup();
    if (!foreignMarkup.isEmpty()) {
        channel.setForeignMarkup(foreignMarkup);
    }

    return channel;
}
 
Example 12
Source File: RssBot.java    From symphony-java-sample-bots with Apache License 2.0 5 votes vote down vote up
private void sendRssFeeds() throws MessagesException, IOException, FeedException {
    sendMessage("Fetching " + feedUrl);
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(new XmlReader(feedUrl));
    List<SyndEntry> entries = feed.getEntries();
    sendMessage("Found " + feed.getEntries().size() + " items in the feed; printing the first " + limit);

    for (int i = 0; i < limit; i++) {
        SyndEntry entry = entries.get(i);
        sendMessage(entry.getTitle() + "-" + entry.getLink());
    }
}
 
Example 13
Source File: GroupAndSortTest.java    From rome with Apache License 2.0 5 votes vote down vote up
/**
 * Test of sort method, of class com.rometools.rome.feed.module.sle.GroupAndSort.
 */
public void testSort2() throws Exception {

    final SyndFeedInput input = new SyndFeedInput();
    final SyndFeed feed = input.build(new File(super.getTestFile("data/YahooTopSongs.xml")));
    final SimpleListExtension sle = (SimpleListExtension) feed.getModule(SimpleListExtension.URI);

    final List<Extendable> entries = new ArrayList<Extendable>(feed.getEntries());
    final List<Extendable> sortedEntries = SleUtility.sort(entries, sle.getSortFields()[0], true);
    for (int i = 0; i < sortedEntries.size(); i++) {
        final SyndEntry entry = (SyndEntry) sortedEntries.get(i);
        LOG.debug(entry.getTitle());
    }

}
 
Example 14
Source File: CsdbReleases.java    From petscii-bbs with Mozilla Public License 2.0 5 votes vote down vote up
private static List<NewsFeed> getFeeds(String urlString) throws IOException, FeedException {
    URL url = new URL(urlString);
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(new XmlReader(url));
    List<CsdbReleases.NewsFeed> result = new LinkedList<>();
    List<SyndEntry> entries = feed.getEntries();
    for (SyndEntry e : entries)
        result.add(new CsdbReleases.NewsFeed(
                e.getPublishedDate(),
                e.getTitle().replaceAll("(?is) by .*?$", EMPTY),
                e.getDescription().getValue(),
                e.getUri()));
    return result;
}
 
Example 15
Source File: GoogleBaseParserTest.java    From rome with Apache License 2.0 5 votes vote down vote up
/**
 * Test of parse method, of class com.totsp.xml.syndication.base.io.GoogleBaseParser.
 */
public void testService2Parse() throws Exception {
    LOG.debug("testService2Parse");
    final SyndFeedInput input = new SyndFeedInput();
    final Calendar cal = Calendar.getInstance();
    cal.setTimeInMillis(0);
    final SyndFeed feed = input.build(new File(super.getTestFile("xml/services2.xml")));
    final List<SyndEntry> entries = feed.getEntries();
    final SyndEntry entry = entries.get(0);
    final Service module = (Service) entry.getModule(GoogleBase.URI);
    Assert.assertEquals("Image Link", "http://www.providers-website.com/image1.jpg", module.getImageLinks()[0].toString());
    cal.set(2005, 11, 20, 0, 0, 0);
    Assert.assertEquals("Expiration Date", cal.getTime(), module.getExpirationDate());
    this.assertEquals("Labels", new String[] { "Food delivery" }, module.getLabels());
    cal.set(2005, 2, 24);
    Assert.assertEquals("Currency", CurrencyEnumeration.USD, module.getCurrency());
    Assert.assertEquals("Price", 15, module.getPrice().getValue(), 0);
    Assert.assertEquals("PriceType", PriceTypeEnumeration.STARTING, module.getPriceType());
    this.assertEquals("Payment Accepted", new PaymentTypeEnumeration[] { PaymentTypeEnumeration.VISA, PaymentTypeEnumeration.MASTERCARD },
            module.getPaymentAccepted());
    Assert.assertEquals("Payment Notes", "minimum payment on credit cards:45", module.getPaymentNotes());
    Assert.assertEquals("Service Type", "delivery", module.getServiceType());
    Assert.assertEquals("Location", "Anytown, CA, USA", module.getLocation());
    Assert.assertEquals("DeliveryRad", 20, module.getDeliveryRadius().getValue(), 0);
    Assert.assertEquals("Delivery Notes", "will deliver between 9am -5pm", module.getDeliveryNotes());

}
 
Example 16
Source File: CsdbReleasesSD2IEC.java    From petscii-bbs with Mozilla Public License 2.0 5 votes vote down vote up
private static List<NewsFeed> getFeeds(String urlString) throws IOException, FeedException {
    URL url = new URL(urlString);
    SyndFeedInput input = new SyndFeedInput();
    SyndFeed feed = input.build(new XmlReader(url));
    List<CsdbReleasesSD2IEC.NewsFeed> result = new LinkedList<>();
    List<SyndEntry> entries = feed.getEntries();
    for (SyndEntry e : entries)
        result.add(new CsdbReleasesSD2IEC.NewsFeed(
                e.getPublishedDate(),
                e.getTitle().replaceAll("(?is) by .*?$", EMPTY),
                e.getDescription().getValue(),
                e.getUri()));
    return result;
}
 
Example 17
Source File: NewVersionsOfArtifactRssFeedProcessorTest.java    From archiva with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testProcess()
    throws Exception
{
    Date whenGatheredDate = new Date( 123456789 );
    ZonedDateTime whenGathered = ZonedDateTime.ofInstant(whenGatheredDate.toInstant(), ZoneId.systemDefault());

    ArtifactMetadata artifact1 = createArtifact( whenGathered, "1.0.1" );
    ArtifactMetadata artifact2 = createArtifact( whenGathered, "1.0.2" );

    Date whenGatheredNextDate = new Date( 345678912 );
    ZonedDateTime whenGatheredNext = ZonedDateTime.ofInstant(whenGatheredNextDate.toInstant(), ZoneId.systemDefault());

    ArtifactMetadata artifact3 = createArtifact( whenGatheredNext, "1.0.3-SNAPSHOT" );

    Map<String, String> reqParams = new HashMap<>();
    reqParams.put( RssFeedProcessor.KEY_GROUP_ID, GROUP_ID );
    reqParams.put( RssFeedProcessor.KEY_ARTIFACT_ID, ARTIFACT_ID );

        expect(metadataRepository.getProjectVersions(session, TEST_REPO, GROUP_ID, ARTIFACT_ID)).andReturn(
                Arrays.asList("1.0.1", "1.0.2", "1.0.3-SNAPSHOT"));
        expect(metadataRepository.getArtifacts(session, TEST_REPO, GROUP_ID, ARTIFACT_ID, "1.0.1")).andReturn(
                Collections.singletonList(artifact1));
        expect(metadataRepository.getArtifacts(session, TEST_REPO, GROUP_ID, ARTIFACT_ID, "1.0.2")).andReturn(
                Collections.singletonList(artifact2));
        expect(metadataRepository.getArtifacts(session, TEST_REPO, GROUP_ID, ARTIFACT_ID, "1.0.3-SNAPSHOT")).andReturn(
                Collections.singletonList(artifact3));
    metadataRepositoryControl.replay();

    SyndFeed feed = newVersionsProcessor.process( reqParams );

    assertEquals( "New Versions of Artifact 'org.apache.archiva:artifact-two'", feed.getTitle() );
    assertEquals( "New versions of artifact 'org.apache.archiva:artifact-two' found during repository scan.",
                  feed.getDescription() );
    assertEquals( "en-us", feed.getLanguage() );
    assertEquals( whenGatheredNext.toInstant(), ZonedDateTime.ofInstant(feed.getPublishedDate().toInstant(), ZoneId.systemDefault()).toInstant() );

    List<SyndEntry> entries = feed.getEntries();

    assertEquals( 2, entries.size() );

    assertTrue( entries.get(0).getTitle().contains("New Versions of Artifact 'org.apache.archiva:artifact-two' as of "));
    assertEquals( whenGathered.toInstant(), entries.get( 0 ).getPublishedDate().toInstant() );

    assertTrue(entries.get(1).getTitle().contains("New Versions of Artifact 'org.apache.archiva:artifact-two' as of "));

    assertEquals( whenGatheredNext.toInstant(), entries.get( 1 ).getPublishedDate().toInstant() );

    metadataRepositoryControl.verify();
}
 
Example 18
Source File: FeedParser.java    From sakai with Educational Community License v2.0 4 votes vote down vote up
/**
 * Parses the entries contained in an RSS feed, extracts the enclosures, converts them to an {@link Attachment}
 * adds them to the map with the entry uri as key.
 * <p>The RSS spec says there is only one enclosure per item so this is what we work with. We don't actually check this so it's possible
 * that if you have more than one enclosure attached to an item that only the latest one will be presented in the end.
 *
 * @param feed
 * @return
 */
public static Map<String, Attachment> parseFeedEnclosures(SyndFeed feed) {
	
	Map<String,Attachment> attachments = new HashMap<String,Attachment>();
	
	// image mime types that are ok to be rendered as an image
	List<String> imageTypes = new ArrayList<String>();
	imageTypes.add("image/jpeg");
	imageTypes.add("image/gif");
	imageTypes.add("image/png");
	imageTypes.add("image/jpg");
	
	List<SyndEntry> entries = feed.getEntries();
	for(SyndEntry entry: entries) {
		
		//get entry uri, but it could be blank so if so, skip this item
		if(StringUtils.isBlank(entry.getUri())) {
			continue;
		}
		
		//for each enclosure attached to an entry get the first one and use that.			
		List<SyndEnclosure> enclosures = entry.getEnclosures();
		for(SyndEnclosure e: enclosures) {
			
			//convert to an Attachment
			Attachment a = new Attachment();
			a.setUrl(e.getUrl());
			a.setDisplayLength(formatLength(e.getLength()));
			a.setType(e.getType());
			
			//process the url into a displayname (get just the filename from the full URL)
			String displayName = StringUtils.substringAfterLast(e.getUrl(), "/");
			if(StringUtils.isNotBlank(displayName)){
				a.setDisplayName(displayName);
			} else {
				a.setDisplayName(Messages.getString("view.attachment.default"));
			}
			
			//check if its an iamge we are able to display as the thumbnail for the entry
			if(imageTypes.contains(e.getType())){
				a.setImage(true);
			} 
			
			attachments.put(entry.getUri(), a);
		}
	}
	
	return attachments;
}
 
Example 19
Source File: Issue131Test.java    From rome with Apache License 2.0 4 votes vote down vote up
private void checkFeed(final SyndFeed feed) {
    checkFeedCategories(feed.getCategories());
    for (final SyndEntry entry : feed.getEntries()) {
        checkEntryCategories(entry.getCategories());
    }
}
 
Example 20
Source File: FeedParserBolt.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
private List<Outlink> parseFeed(String url, byte[] content,
        Metadata parentMetadata) throws Exception {
    List<Outlink> links = new ArrayList<>();

    SyndFeed feed = null;
    try (ByteArrayInputStream is = new ByteArrayInputStream(content)) {
        SyndFeedInput input = new SyndFeedInput();
        feed = input.build(new InputSource(is));
    }

    URL sURL = new URL(url);

    List<SyndEntry> entries = feed.getEntries();
    for (SyndEntry entry : entries) {
        String targetURL = entry.getLink();
        // targetURL can be null?!?
        // e.g. feed does not use links but guid
        if (StringUtils.isBlank(targetURL)) {
            targetURL = entry.getUri();
            if (StringUtils.isBlank(targetURL)) {
                continue;
            }
        }
        Outlink newLink = filterOutlink(sURL, targetURL, parentMetadata);
        if (newLink == null)
            continue;

        String title = entry.getTitle();
        if (StringUtils.isNotBlank(title)) {
            newLink.getMetadata().setValue("feed.title", title.trim());
        }

        Date publishedDate = entry.getPublishedDate();
        if (publishedDate != null) {
            // filter based on the published date
            if (filterHoursSincePub != -1) {
                Calendar rightNow = Calendar.getInstance();
                rightNow.add(Calendar.HOUR, -filterHoursSincePub);
                if (publishedDate.before(rightNow.getTime())) {
                    LOG.info(
                            "{} has a published date {} which is more than {} hours old",
                            targetURL, publishedDate.toString(),
                            filterHoursSincePub);
                    continue;
                }
            }
            newLink.getMetadata().setValue("feed.publishedDate",
                    publishedDate.toString());
        }

        SyndContent description = entry.getDescription();
        if (description != null
                && StringUtils.isNotBlank(description.getValue())) {
            newLink.getMetadata().setValue("feed.description",
                    description.getValue());
        }

        links.add(newLink);
    }

    return links;
}