from datetime import datetime, timedelta
from dateutil.parser import parse
import feedparser
import logging
from peewee import IntegrityError
import pytz
import time
from urlparse import urlparse

import app_config
from util.models import Story

logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

class RSSScraper:
    def __init__(self, source):
        self.source = source

        # We'll ignore stories older than this.
        self.magic_date_cutoff = datetime.now(pytz.timezone(app_config.PROJECT_TIMEZONE)) - timedelta(days=5)

    def scrape_and_load(self):
        raw_stories = self.scrape()
        stories = self.write(stories=raw_stories, team=self.source['team'])
        return stories

    """
    Scrape an RSS feed
    """
    def scrape(self):
        feed = feedparser.parse(self.source['url'])
        stories = []
        for entry in feed.entries:
            title = entry.title
            date = parse(entry.published)
            link = entry.link
            slug = urlparse(link).path #  entry.id
            slug = slug.replace('//', '') # Temp hack for bad carebot blog urls

            # if date > self.magic_date_cutoff:
            stories.append({
                'name': title,
                'slug': slug,
                'url': link,
                'date': date
            })

        return stories

    def write(self, stories, team=None):
        # TODO
        # this should be abstracted here and in spreadsheet.py
        new_stories = []
        for story in stories:
            try:
                story = Story.create(
                    name = story['name'],
                    slug = story['slug'],
                    date = story['date'],
                    url = story['url'],
                    team = team
                )
                new_stories.append(story)
            except IntegrityError:
                # Story probably already exists.
                logger.info('Not adding %s to database: probably already exists' % (story['name']))


        return new_stories