python source code of tntapi

#! /usr/bin/python
# -*- coding: utf-8 -*-

"""
Copyright (C) 2016 Borja Menendez Moreno

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

Authors: Borja Menéndez Moreno <info20up@gmail.com>

This is the API for the 20up backup program. This API allows a client
to retrieve information about his specific account.
"""
from time import sleep
import re, unicodedata
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

INFOS = {
    'email'         :   'email',
    'password'      :   'input_password',
    'submit'        :   'submit_button',
    'albums'        :   'pst-pht-dropwdown',
    'classAlbums'   :   'sel-block',
    'ulFirstPic'    :   'albumBody',
    'albumDisplay'  :   'album-display',
    'idPhotoImage'  :   'photo_image',
    'viewMore'      :   'overlay_wall_view_more',
    'picComments'   :   'wallpost-list-overlay-comments',
    'picDate'       :   'h-date',
    'username'      :   'cmt-username',
    'comment'       :   'wall-delete',
    'next'          :   'photo_nav_next',
    'next2'         :   'pht-nav-next',
    'next3'         :   'i-nav-next',
}

TWENTY_HOST = 'https://www.tuenti.com/'
URLS = {
    'login'         :   TWENTY_HOST,
    'home'          :   TWENTY_HOST + '#m=Home&func=view_social_home_canvas',
    'my_profile'    :   TWENTY_HOST + '#m=Profile&func=index',
    'my_privates'   :   TWENTY_HOST + '#m=Messages&func=index',
}

def normalize(text):
    t = unicodedata.normalize('NFKD', text)
    t = re.sub('[^a-zA-Z0-9\n\.]', '-', t)
    return t

class API():
    """
    The API class.
    This class is used for getting information.
    It only parses the information given by the web browser.
    """
    def __init__(self, browser):
        self.driver = ''
        try:
            if browser == 'ch':
                self.driver = webdriver.Chrome()
            elif browser == 'fi':
                self.driver = webdriver.Firefox()
            else:
                raise RuntimeError('El navegador web elegido no esta soportado por 20up')
        except:
            raise RuntimeError('Imposible abrir el navegador web; por favor, elige otro')

        self.driver.get(URLS['login'])
        
    def waitForLogin(self):
        """
        Wait for the user to be logged into the social network.
        """
        logged = False
        while not logged:
            try:
                WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, 'sideBarPlaceHolder')))
                logged = True
            except:
                pass
            
    def getAllAlbums(self):
        """
        Get all the albums for the given user.
        An album is a shortlist of three elements (name, number, link), where
        <name> is the name of the album itself, <number> is the number of
        pictures in the album and <link> is the url of the album.

        Returns:
            A list of albums.
        """
        self.driver.get(URLS['my_profile'])

        trigger = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, 'albumSelector')))
        
        if self.driver.current_url != URLS['my_profile']:
            return None
        
        trigger.click()
        
        myAlbums = []
        
        soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        allAlbums = soup.find('ul', {'class': INFOS['albums']}).find_all('a', {'class': INFOS['classAlbums']})
        for album in allAlbums:
            withoutSpace = album.getText().split()
            name = normalize(' '.join(withoutSpace[:-1]).replace(' ','-'))
            intNumber = withoutSpace[-1][1:-1]
            if intNumber.find('.') != -1:
                intNumber = intNumber.split('.')
            else:
                intNumber = intNumber.split(',')
            finalNumber = ''
            for num in intNumber:
                finalNumber += num
            number = int(finalNumber)
            link = album['href']
            
            nnl = [name, number, TWENTY_HOST + link]
            myAlbums.append(nnl)

        return myAlbums
        
    def getFirstPicture(self, linkAlbum, oldFirstPictureLink=''):
        """
        Get the first picture from a given album.
        
        Args:
            linkAlbum: the <link> of the album to be downloaded.
            oldFirstPictureLink: the <link> of the first picture of the
                                 previous album.
            
        Returns:
            A <link> to the picture.
        """
        self.driver.get(linkAlbum)

        picture = oldFirstPictureLink
        try:
            while picture == oldFirstPictureLink:
                element = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, INFOS['ulFirstPic'])))
                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                try:
                    picture = soup.find('div', {'class' : INFOS['albumDisplay']}).find('ul', {'id' : INFOS['ulFirstPic']}).find_all('li')[0].find('a')
                    picture = TWENTY_HOST + picture['href']
                except:
                    pass
                sleep(0.2)

            self.driver.get(picture)
        except:
            pass
        
        return picture
        
    def getPicture(self, oldSrc, comments=False):
        """
        Get the picture to be downloaded and the comments, if so.
        
        Args:
            linkPicture: the link to the picture.
            comments: indicates wether obtain comments of the picture or not.
            
        Returns:
            A shortlist of four elements of (link, title, date, comments),
            where <link> is the image to be downloaded, <title> is the title
            of the picture, <date> is the submission's date and <comments> is
            a list of comments.
        """        
        newSrc = oldSrc
        while newSrc == oldSrc:
            newSrc = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, INFOS['idPhotoImage']))).get_attribute('src')
            sleep(0.1)
        
        soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        pic = soup.find('img', {'id': INFOS['idPhotoImage']})
        image = pic['src']
        title = pic['alt']
        date = soup.find('em', {'class': INFOS['picDate']}).getText()
        date = date.replace(':','-').replace(',','').replace(' ','-')
        allComments = []
        if comments:
            allComments = self.getComments()
            
        return [image, title, date, allComments]
        
    def getComments(self):
        """
        Get the comments of the current picture.
            
        Returns:
            A list of comments of the picture, where a <comment> is a shortlist
            with the comment itself (<user date: comment>).
        """
        while True:
            try:
                more = WebDriverWait(self.driver, 1).until(EC.presence_of_element_located((By.ID, INFOS['viewMore'])))
                more.click()
            except:
                break
        
        soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        myComments = []
        try:
            allComments = soup.find('ol', {'id': INFOS['picComments']}).find_all('li')
            for comment in allComments:
                try:
                    myComments.append(comment.get_text(separator=' ').replace(' Eliminar', ':'))
                except:
                    pass
        except:
            pass
            
        return myComments
    
    def getNextPicture(self):
        """
        Get the next picture to the given picture.
        """
        try:
            next = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.ID, INFOS['next'])))
            try:
                self.driver.execute_script("document.getElementById('" + INFOS['next'] + "').focus();")
            except:
                pass
            finally:
                next.click()
        except:
            try:
                next = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, INFOS['next3'])))
                next.click()
            except:
                pass
        
    def goToPrivates(self):
        """
        Open the private messages' page.
        """
        self.driver.get(URLS['my_privates'])
        
    def loadMoreComments(self, discards):
        """
        Load more comments of the wall.
        
        Args:
            discards: the number of comments to be discarded.
        """
        self.driver.get(URLS['my_profile'])
        
        trigger = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, 'albumSelector')))
        
        if self.driver.current_url != URLS['my_profile']:
            raise RuntimeError('Es necesario entrar primero a la red social')

        self.driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
        try:
            className = "//*[contains(@class, 'act-L')]"
            triggers = self.driver.find_elements_by_xpath(className)
            for trigger in triggers:
                if 'Ver' in trigger.text:
                    trigger.click()
                    sleep(0.5)
                    break
        except:
            pass
        
        soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        
        listComments = soup.find('ol', {'id' : 'wallpost-list'}).find_all('li', {'class' : 'item'})
        
        counter = 0
        returnedList = []
        for comment in listComments:
            if counter < discards:
                counter += 1
                continue
            returnedList.append(comment.get_text(separator=' '))
        
        return returnedList