"""
A simple RSS feed sucker.

Usage:

   app = wsgiFeedSuck(RSS_URI, prefix)

where 'prefix' is the prefix to remove from the permalinks, e.g.

   app = wsgiFeedSuck("http://www.advogato.org/person/titus/rss.xml",
                      "http://www.advogato.org/person/titus/diary.html?start=")

will publish diary entries by number, so that

   ./59

points to entry '59' of my diary.
"""

import time
import feedparser                       # Mark Pilgrim's feedparser
import sys
import shelve

# a file for 'shelve' to use.
DEFAULT_CACHE_FILE='/tmp/feed-suck.cache'

# how frequently to check for new stuff
FEEDCHECK_INTERVAL=60*60                # seconds

#
# format_entry
#

def format_entry(feed, entry):
    s = """
<h2><A href='%s'>%s</a></h2>
<i>from <a href='%s'>%s</i></a>
<p>
%s
""" % (entry.link,
       entry.title,
       feed.feed.link, feed.feed.title,
       entry.summary,)

    return s

class wsgiFeedSuck:
    """
    RSS feed displayer.
    """
    
    def __init__(self, feed, prefix, cache_name=DEFAULT_CACHE_FILE):
        self.feed_uri = feed
        self.prefix = prefix
        self.cache_name = cache_name

    def _get_feed(self):
        """
        The only actually complicated function in the class ;).

        This function checks to see if we have a cached object,
        and (if so) checks to see  if the cached object is old enough to
        check again.  If so, it sucks it down, using both etag &
        last_modified.
        """
        feed_uri = self.feed_uri

        # first, load in the cache object (if any)
        cache = shelve.open(self.cache_name)
        last_cache_obj = cache.get(feed_uri)

        # if we have a cache obj, see if FEEDCHECK_INTERVAL has
        # passed.
        if last_cache_obj:
            last_feed = last_cache_obj.parsed_feed
            time_diff = time.time() - last_cache_obj.last_checked

            if time_diff < FEEDCHECK_INTERVAL:
                sys.stderr.write('only %d seconds have passed, not sucking\n' \
                                 % (time_diff,))
                return last_feed
        else:
            last_feed = None
            
        # proceed: try to grab a new feed.
        
        # set the etag and modified variables
        etag = modified = None
        if last_feed:
            etag = last_feed.etag
            modified = last_feed.modified

        # query for a new feed.
        sys.stderr.write('sucking feed %s\n' % (feed_uri,))
        feed = feedparser.parse(feed_uri, etag=etag, modified=modified)

        # if nothing, reset to last feed.
        if not feed.feed:
            sys.stderr.write('no new feed.\n')
            feed = last_feed
        else:
            sys.stderr.write('got new feed\n')

        # shelve the new or updated cache_obj.
        cache_obj = CachedFeed(feed_uri, feed, time.time())
        cache[feed_uri] = cache_obj
        cache.close()

        return feed

    def _transform_link(self, link):
        """
        Remove the prefix, if present.
        """
        prefix = self.prefix
        if link.find(prefix) == 0:
            link = link[len(prefix):]

        return link

    def __call__(self, environ, start_response):
        """
        Serve either the directory or a specific entry.
        """
        feed = self._get_feed()

        # 'pathinfo' defines the entry they're looking for.
        pathinfo = environ['PATH_INFO']

        # no path given? return index.
        if not pathinfo.strip('/'):
            return self.serve_directory(feed, start_response)

        # transform this entry & look for the appropriate link.
        entry_link = self.prefix + pathinfo[1:]
        for i in feed.entries:
            if i.link == entry_link:    # found it! quit.
                entry = format_entry(feed, i)
                break
        else:
            entry = "no such entry <tt>%s</tt>" % (pathinfo,)

        # start the response...
        start_response("200 OK", [('Content-type', 'text/html'),])

        # ... & return the data.
        return _EntryIterator(entry)

    def serve_directory(self, feed, start_response):
        # just give a simple listing of all of the feed entries, with
        # transformed links.
        s = "<h2><a href='%s'>%s</h2>" % (feed.feed.link,
                                          feed.feed.title,)
        for i in feed.entries:
            s += "<a href='%s'>%s</a><br>" % (self._transform_link(i.link),
                                              i.title)

        start_response("200 OK", [('Content-type', 'text/html'),])
        return _EntryIterator(s)

class _EntryIterator:
    def __init__(self, data):
        self.data = data

    def __iter__(self):
        yield self.data

class CachedFeed:
    """
    A cached feed, containing the last downloaded feed (in parsed format)
    together with the time.time() since it was last checked.
    """
    def __init__(self, uri, parsed_feed, last_checked):
        self.uri = uri
        self.parsed_feed = parsed_feed
        self.last_checked = last_checked

