#!/usr/bin/env python3 # -*- coding: utf-8 -*- from urllib.request import urlopen, Request from urllib.parse import quote from textwrap import wrap from lyrics import CACHE_PATH import os import re url = 'https://www.google.com/search?q=' HEADER = {'User-Agent': 'Mozilla/5.0'} CLASS_NAME = r'\w{5,7} \w{4,5} \w{5,7}' # dependent on User-Agent # location for cache def query(track_name): return quote(track_name + ' lyrics') def get_html(url, header=HEADER): try: req = Request(url, data=None, headers=header) req_url = urlopen(req) except Exception as e: return 'Cannot connect to internet!', e if req_url.code != 200: print('invalid request') exit(1) return req_url.read().decode('utf-8') def get_az_html(url): html = get_html(url.replace('lyrics', 'azlyrics')) if isinstance(html, tuple): return html regex = re.compile(r'(http[s]?://www.azlyrics.com/lyrics(?:.*?))&') az_url = regex.search(html) if az_url == None: return 'No Lyrics Found!' else: header = {'User-Agent': 'Mozilla/5.0 Firefox/70.0'} az_url = az_url.group(1) az_html = get_html(az_url, header) return az_html def get_azlyrics(url): az_html = get_az_html(url) if isinstance(az_html, tuple): return az_html[0] az_regex = re.compile(r'<!-- Usage of azlyrics.com content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->(.*)<!-- MxM banner -->', re.S) ly = az_regex.search(az_html) if ly == None: # Az lyrics not found return 'Azlyrics missing...' rep = {'"': '\"', '&': '&', '\r' : ''} ly = re.sub(r'<[/]?\w*?>', '', ly.group(1)).strip() # ly = ly.replace('"', '\"').replace('&', '&') # regex = re.compile('|'.join(substrings)) ly = re.sub('|'.join(rep.keys()), lambda match: rep[match.group(0)], ly) lyrics_lines = ly.split('\n') return lyrics_lines def fetch_lyrics(url): html = get_html(url) if isinstance(html, tuple): return html[0] html_regex = re.compile(r'<div class="{}">([^>]*?)</div>'.format(CLASS_NAME), re.S) text_list = html_regex.findall(html) if len(text_list) < 2: # No google result found! lyrics_lines = get_azlyrics(url) else: ly = [] for l in text_list[1:]: # lyrics must be multiline, # ignore the artist info below lyrics if l.count('\n') > 2: ly += l.split('\n') if len(ly) < 5: # too short match for lyrics lyrics_lines = get_azlyrics(url) else: # format lyrics lyrics_lines = ly return lyrics_lines def get_lyrics(track_name, source, cache=True): filename = track_name.strip() filename = re.sub(r'\s|\/|\\|\.', '', filename) filepath = os.path.join(CACHE_PATH, filename) if not os.path.isdir(CACHE_PATH): os.makedirs(CACHE_PATH) if os.path.isfile(filepath) and cache: # lyrics exist with open(filepath) as file: lyrics_lines = file.read().splitlines() else: if source == 'google': lyrics_lines = fetch_lyrics(url + query(track_name)) else: lyrics_lines = get_azlyrics(url + query(track_name)) if isinstance(lyrics_lines, str): return ['lyrics not found! :(', 'Issue is:', lyrics_lines] text = map(lambda x : x.replace('&', '&') + '\n', lyrics_lines) with open(filepath, 'w') as file: file.writelines(text) return lyrics_lines def delete_lyrics(track_name): filename = track_name.strip() filename = re.sub(r'\s|\/|\\|\.', '', filename) filepath = os.path.join(CACHE_PATH, filename) if os.path.isfile(filepath): # lyrics exist os.remove(filepath) return True return False def align(lines, width, alignment=1): if alignment == 1: return lines elif alignment == 0: return [line.center(width) for line in lines] else: return [line.rjust(width - 1) for line in lines] def wrap_text(text, width): lines = [] for line in text: if len(line) > width: line = wrap(line, width=width) if isinstance(line, list): lines += line else: lines.append(line) return lines