import re import os import functools from itertools import chain from datetime import date, datetime, timedelta import yaml from flask import current_app, Markup from veripress import cache from veripress.model.models import Page, Post, Widget from veripress.model.parsers import get_standard_format_name, get_parser from veripress.helpers import to_list, to_datetime, Pair, traverse_directory class Storage(object): def __init__(self): """Initialization.""" self._closed = False def close(self): """ Close the storage. Subclasses should override this to close any file descriptor or database connection if necessary. """ self._closed = True @property def closed(self): """ Read-only property. This state should be changed only in 'close' method. """ return self._closed @cache.memoize(timeout=1 * 60) def fix_relative_url(self, publish_type, rel_url): """ Fix post or page relative url to a standard, uniform format. :param publish_type: publish type ('post' or 'page') :param rel_url: relative url to fix :return: tuple(fixed relative url or file path if exists else None, file exists or not) :raise ValueError: unknown publish type """ if publish_type == 'post': return self.fix_post_relative_url(rel_url), False elif publish_type == 'page': return self.fix_page_relative_url(rel_url) else: raise ValueError( 'Publish type "{}" is not supported'.format(publish_type)) @staticmethod @cache.memoize(timeout=2 * 60 * 60) # actually, it will never change def fix_post_relative_url(rel_url): """ Fix post relative url to a standard, uniform format. Possible input: - 2016/7/8/my-post - 2016/07/08/my-post.html - 2016/8/09/my-post/ - 2016/8/09/my-post/index - 2016/8/09/my-post/index.htm - 2016/8/09/my-post/index.html :param rel_url: relative url to fix :return: fixed relative url, or None if cannot recognize """ m = re.match( r'^(?P<year>\d{4})/(?P<month>\d{1,2})/(?P<day>\d{1,2})/' r'(?P<post_name>[^/]+?)' r'(?:(?:\.html)|(?:/(?P<index>index(?:\.html?)?)?))?$', rel_url ) if not m: return None year, month, day, post_name = m.groups()[:4] try: d = date(year=int(year), month=int(month), day=int(day)) return '/'.join((d.strftime('%Y/%m/%d'), post_name, 'index.html' if m.group('index') else '')) except (TypeError, ValueError): # the date is invalid return None @staticmethod def fix_page_relative_url(rel_url): """ Fix page relative url to a standard, uniform format. Possible input: - my-page - my-page/ - my-page/index - my-page/index.htm - my-page/index.html - my-page/specific.file NOTE! Because custom page has a very strong connection with the storage type chosen, this method should be implemented in subclasses. :param rel_url: relative url to fix :return: tuple(fixed relative url or file path if exists else None, file exists or not) """ raise NotImplementedError def get_posts(self, include_draft=False, filter_functions=None): """ Get all posts, returns an iterable of Post object. """ raise NotImplementedError def get_post(self, rel_url, include_draft=False): """ Get post for given relative url, returns a Post object. """ raise NotImplementedError def get_tags(self): """ Get all tags as a list of dict_item(tag_name, Pair(count_all, count_published)). """ raise NotImplementedError def get_categories(self): """ Get all categories as a list of dict_item(category_name, Pair(count_all, count_published)). """ raise NotImplementedError def get_pages(self, include_draft=False): """ Get all custom pages, returns an iterable of Page object. """ raise NotImplementedError def get_page(self, rel_url, include_draft=False): """ Get custom page for given relative url, returns a Page object. """ raise NotImplementedError def get_widgets(self, position=None, include_draft=False): """ Get all widgets, returns an iterable of Widget object. """ raise NotImplementedError @staticmethod def _filter_result(result, filter_functions=None): """ Filter result with given filter functions. :param result: an iterable object :param filter_functions: some filter functions :return: a filter object (filtered result) """ if filter_functions is not None: for filter_func in filter_functions: result = filter(filter_func, result) return result def get_posts_with_limits(self, include_draft=False, **limits): """ Get all posts and filter them as needed. :param include_draft: return draft posts or not :param limits: other limits to the attrs of the result, should be a dict with string or list values :return: an iterable of Post objects """ filter_funcs = [] for attr in ('title', 'layout', 'author', 'email', 'tags', 'categories'): if limits.get(attr): filter_set = set(to_list(limits.get(attr))) def get_filter_func(filter_set_, attr_): return lambda p: filter_set_.intersection( to_list(getattr(p, attr_))) filter_funcs.append(get_filter_func(filter_set, attr)) for attr in ('created', 'updated'): interval = limits.get(attr) if isinstance(interval, (list, tuple)) and len(interval) == 2 \ and isinstance(interval[0], date) and isinstance( interval[1], date): # [start date(time), end date(time)] start, end = interval start = to_datetime(start) if not isinstance(end, datetime): # 'end' is a date, # we should convert it to 00:00:00 of the next day, # so that posts of that day will be included end = datetime.strptime( '%04d-%02d-%02d' % (end.year, end.month, end.day), '%Y-%m-%d') end += timedelta(days=1) def get_filter_func(attr_, start_dt, end_dt): return lambda p: start_dt <= getattr(p, attr_) < end_dt filter_funcs.append(get_filter_func(attr, start, end)) return self.get_posts(include_draft=include_draft, filter_functions=filter_funcs) def search_for(self, query, include_draft=False): """ Search for a query text. :param query: keyword to query :param include_draft: return draft posts/pages or not :return: an iterable object of posts and pages (if allowed). """ query = query.lower() if not query: return [] def contains_query_keyword(post_or_page): contains = query in post_or_page.title.lower() \ or query in Markup( get_parser(post_or_page.format).parse_whole( post_or_page.raw_content) ).striptags().lower() return contains return filter(contains_query_keyword, chain(self.get_posts(include_draft=include_draft), self.get_pages(include_draft=include_draft) if current_app.config[ 'ALLOW_SEARCH_PAGES'] else [])) class FileStorage(Storage): @staticmethod @cache.memoize(timeout=1 * 60) def fix_page_relative_url(rel_url): """ Fix page relative url to a standard, uniform format. Possible input: - my-page - my-page/ - my-page/index - my-page/index.htm - my-page/index.html - my-page/specific.file :param rel_url: relative url to fix :return: tuple(fixed relative url or FILE PATH if exists else None, file exists or not) """ rel_url = rel_url.lstrip('/') # trim all heading '/' endswith_slash = rel_url.endswith('/') rel_url = rel_url.rstrip('/') + ( '/' if endswith_slash else '') # preserve only one trailing '/' if not rel_url or rel_url == '/': return None, False file_path = os.path.join(current_app.instance_path, 'pages', rel_url.replace('/', os.path.sep)) if rel_url.endswith('/'): index_html_file_path = os.path.join(file_path, 'index.html') if os.path.isfile(index_html_file_path): # index.html exists return index_html_file_path, True return rel_url, False elif os.path.isfile(file_path): ext = os.path.splitext(file_path)[1][1:] if get_standard_format_name(ext) is not None: # is source of custom page if current_app.config['PAGE_SOURCE_ACCESSIBLE']: return file_path, True else: # is other direct files return file_path, True elif os.path.isdir(file_path): return rel_url + '/', False sp = rel_url.rsplit('/', 1) m = re.match(r'(.+)\.html?', sp[-1]) if m: sp[-1] = m.group(1) + '.html' else: sp[-1] += '.html' return '/'.join(sp), False @staticmethod @cache.memoize(timeout=1 * 60) def search_file(search_root, search_filename, instance_relative_root=False): """ Search for a filename in a specific search root dir. :param search_root: root dir to search :param search_filename: filename to search (no extension) :param instance_relative_root: search root is relative to instance path :return: tuple(full_file_path, extension without heading dot) """ if instance_relative_root: search_root = os.path.join(current_app.instance_path, search_root) file_path = None file_ext = None for file in os.listdir(search_root): filename, ext = os.path.splitext(file) if filename == search_filename and ext and ext != '.': file_path = os.path.join(search_root, filename + ext) file_ext = ext[1:] # remove heading '.' (dot) break return file_path, file_ext # noinspection PyUnresolvedReferences search_instance_file = staticmethod( functools.partial(search_file.__func__, instance_relative_root=True)) @staticmethod @cache.memoize(timeout=1 * 60) def read_file(file_path): """ Read yaml head and raw body content from a file. :param file_path: file path :return: tuple(meta, raw_content) """ with open(file_path, 'r', encoding='utf-8') as f: whole = f.read().strip() if whole.startswith('---'): # may has yaml meta info, so we try to split it out sp = re.split(r'-{3,}', whole.lstrip('-'), maxsplit=1) if len(sp) == 2: # do have yaml meta info, so we read it return yaml.load(sp[0]), sp[1].lstrip() return {}, whole @cache.memoize(timeout=2 * 60) def get_posts(self, include_draft=False, filter_functions=None): """ Get all posts from filesystem. :param include_draft: return draft posts or not :param filter_functions: filter to apply BEFORE result being sorted :return: an iterable of Post objects (the first is the latest post) """ def posts_generator(path): """Loads valid posts one by one in the given path.""" if os.path.isdir(path): for file in os.listdir(path): filename, ext = os.path.splitext(file) format_name = get_standard_format_name(ext[1:]) if format_name is not None and re.match( r'\d{4}-\d{2}-\d{2}-.+', filename): # the format is supported and the filename is valid, # so load this post post = Post() post.format = format_name post.meta, post.raw_content = FileStorage.read_file( os.path.join(path, file)) post.rel_url = filename.replace('-', '/', 3) + '/' post.unique_key = '/post/' + post.rel_url yield post posts_path = os.path.join(current_app.instance_path, 'posts') result = filter(lambda p: include_draft or not p.is_draft, posts_generator(posts_path)) result = self._filter_result(result, filter_functions) return sorted(result, key=lambda p: p.created, reverse=True) @cache.memoize(timeout=2 * 60) def get_post(self, rel_url, include_draft=False): """ Get post for given relative url from filesystem. Possible input: - 2017/01/01/my-post/ - 2017/01/01/my-post/index.html :param rel_url: relative url :param include_draft: return draft post or not :return: a Post object """ raw_rel_url = str(rel_url) if rel_url.endswith('/index.html'): rel_url = rel_url.rsplit('/', 1)[ 0] + '/' # remove the trailing 'index.html' post_filename = rel_url[:-1].replace('/', '-') post_file_path, post_file_ext = FileStorage.search_instance_file( 'posts', post_filename) if post_file_path is None or post_file_ext is None or \ get_standard_format_name(post_file_ext) is None: # no such post return None # construct the post object post = Post() post.rel_url = raw_rel_url # 'rel_url' contains no trailing 'index.html' post.unique_key = '/post/' + rel_url post.format = get_standard_format_name(post_file_ext) post.meta, post.raw_content = FileStorage.read_file(post_file_path) return post if include_draft or not post.is_draft else None @cache.memoize(timeout=5 * 60) def get_tags(self): """ Get all tags and post count of each tag. :return: dict_item(tag_name, Pair(count_all, count_published)) """ posts = self.get_posts(include_draft=True) result = {} for post in posts: for tag_name in set(post.tags): result[tag_name] = result.setdefault( tag_name, Pair(0, 0)) + Pair(1, 0 if post.is_draft else 1) return list(result.items()) @cache.memoize(timeout=5 * 60) def get_categories(self): """ Get all categories and post count of each category. :return dict_item(category_name, Pair(count_all, count_published)) """ posts = self.get_posts(include_draft=True) result = {} for post in posts: for category_name in set(post.categories): result[category_name] = result.setdefault( category_name, Pair(0, 0)) + Pair(1, 0 if post.is_draft else 1) return list(result.items()) @cache.memoize(timeout=2 * 60) def get_pages(self, include_draft=False): """ Get all custom pages (supported formats, excluding other files like '.js', '.css', '.html'). :param include_draft: return draft page or not :return: an iterable of Page objects """ def pages_generator(pages_root_path): for file_path in traverse_directory(pages_root_path, yield_dir=False): rel_path = os.path.relpath(file_path, pages_root_path) rel_path, ext = os.path.splitext(rel_path) if not ext or ext == '.' or get_standard_format_name( ext[1:]) is None: continue # pragma: no cover if rel_path.endswith(os.path.sep + 'index'): rel_path = rel_path[:-len('index')] else: rel_path += '.html' page = self.get_page(rel_path.replace(os.path.sep, '/'), include_draft=include_draft) if page is not None: yield page pages_path = os.path.join(current_app.instance_path, 'pages') return list(pages_generator(pages_path)) @cache.memoize(timeout=2 * 60) def get_page(self, rel_url, include_draft=False): """ Get custom page for given relative url from filesystem. Possible input: - my-page/ - my-page/index.html - my-another-page.html - a/b/c/ - a/b/c/d.html :param rel_url: relative url :param include_draft: return draft page or not :return: a Page object """ page_dir = os.path.dirname(rel_url.replace('/', os.path.sep)) page_path = os.path.join(current_app.instance_path, 'pages', page_dir) if not os.path.isdir(page_path): # no such directory return None page_filename = rel_url[len(page_dir):].lstrip('/') if not page_filename: page_filename = 'index' else: page_filename = os.path.splitext(page_filename)[0] page_file_path, page_file_ext = FileStorage.search_file(page_path, page_filename) if page_file_path is None or page_file_ext is None or \ get_standard_format_name(page_file_ext) is None: # no such page return None page = Page() page.rel_url = rel_url page.unique_key = '/' + ( rel_url.rsplit('/', 1)[0] + '/' if rel_url.endswith( '/index.html') else rel_url) page.format = get_standard_format_name(page_file_ext) page.meta, page.raw_content = FileStorage.read_file(page_file_path) return page if include_draft or not page.is_draft else None @cache.memoize(timeout=2 * 60) def get_widgets(self, position=None, include_draft=False): """ Get widgets for given position from filesystem. :param position: position or position list :param include_draft: return draft widgets or not :return: an iterable of Widget objects """ def widgets_generator(path): """Loads valid widgets one by one in the given path.""" if os.path.isdir(path): for file in os.listdir(path): _, ext = os.path.splitext(file) format_name = get_standard_format_name(ext[1:]) if format_name is not None: # the format is supported, so load it widget = Widget() widget.format = format_name widget.meta, widget.raw_content = \ FileStorage.read_file(os.path.join(path, file)) yield widget widgets_path = os.path.join(current_app.instance_path, 'widgets') positions = to_list(position) if position is not None else position result = filter( lambda w: (w.position in positions if positions is not None else True) and (include_draft or not w.is_draft), widgets_generator(widgets_path)) return sorted(result, key=lambda w: (w.position, w.order))