# -*- coding: utf-8 -*- from crcUtils import signedCrc32 import LikedSavedDatabase import imageSaver import imgurpython as imgur import logger import os import re import settings import sys import utilities import urllib if sys.version_info[0] >= 3: from urllib.request import urlretrieve, urlopen #from urllib.request import urlopen else: from urllib import urlretrieve, urlopen def isImgurIndirectUrl(url): # If it is imgur domain, has no file type, and isn't an imgur album return ('imgur' in url.lower() and not imageSaver.getFileTypeFromUrl(url) and not '/a/' in url) def imgurIdFromUrl(url): idMatch = re.search(r"imgur.com.*/(.*)", url) if not idMatch: return None return idMatch.group(1) def convertImgurIndirectUrlToImg(submission, imgurAuth, url): # Login to imgur # This is required since they made NSFW images require login imgurClient = imgur.ImgurClient(imgurAuth.clientId, imgurAuth.clientSecret) if not checkImgurAPICredits(imgurClient): return None imageId = imgurIdFromUrl(url) if not imageId: logger.log("Failed to convert {} to image id".format(url)) try: return imgurClient.get_image(imageId).link except Exception as e: errorMessage = ('Failed to convert imgur to image link: ' '[ERROR] Exception: Url {} raised exception:\n\t {}'.format(url, e)) logger.log(errorMessage) LikedSavedDatabase.db.addUnsupportedSubmission(submission, errorMessage) return None def isImgurAlbumUrl(url): # If it is imgur domain, has no file type, and is an imgur album return ('imgur' in url.lower() and not imageSaver.getFileTypeFromUrl(url) and '/a/' in url) # Obnoxious special case: imgur album urls with anchors (eg /a/erere#0) def cleanImgurAlbumUrl(url): anchor = url.rfind('#') if anchor > -1: return url[:anchor] return url # Returns whether or not there are credits remaining def checkImgurAPICredits(imgurClient): logger.log('Imgur API Credit Report:\n' + '\tUserRemaining: ' + str(imgurClient.credits['UserRemaining']) + '\n\tClientRemaining: ' + str(imgurClient.credits['ClientRemaining'])) if not imgurClient.credits['UserRemaining']: logger.log('You have used up all of your Imgur API credits! Please wait an hour') return False # Ensure that this user doesn't suck up all the credits (remove this if you're an asshole) if imgurClient.credits['ClientRemaining'] < 1000: logger.log('RedditLikedSavedImageDownloader Imgur Client is running low on Imgur API credits!\n' 'Unfortunately, this means no one can download any Imgur albums until the end of the month.\n' 'If you are really jonesing for access, authorize your own Imgur Client and fill in' ' its details in settings.txt.') return False return True class ImgurAuth: def __init__(self, clientId, clientSecret): self.clientId = clientId self.clientSecret = clientSecret def getImgurAuth(): imgurAuth = None if settings.hasImgurSettings(): return ImgurAuth(settings.settings['Imgur_client_id'], settings.settings['Imgur_client_secret']) else: logger.log('No Imgur Client ID and/or Imgur Client Secret was provided, or album download is not' ' enabled. This is required to download imgur albums. They will be ignored. Check' ' settings.txt for how to fill in these values.') return None def saveAllImgurAlbums(outputDir, imgurAuth, subredditAlbums, soft_retrieve_imgs = True): numSavedAlbumsTotal = 0 # Login to imgur imgurClient = imgur.ImgurClient(imgurAuth.clientId, imgurAuth.clientSecret) if not checkImgurAPICredits(imgurClient): return 0 if not soft_retrieve_imgs: utilities.makeDirIfNonexistant(outputDir) subredditIndex = -1 numSubreddits = len(subredditAlbums) for subredditDir, albums in subredditAlbums.items(): subredditIndex += 1 logger.log('[' + imageSaver.percentageComplete(subredditIndex, numSubreddits) + '] ' + subredditDir) if not soft_retrieve_imgs: # Make directory for subreddit utilities.makeDirIfNonexistant(outputDir + '/' + subredditDir) numAlbums = len(albums) for albumIndex, album in enumerate(albums): albumSubmission = album[0] albumTitle = album[1] albumUrl = cleanImgurAlbumUrl(album[2]) logger.log('\t[' + imageSaver.percentageComplete(albumIndex, numAlbums) + '] ' + '\t' + albumTitle + ' (' + albumUrl + ')') # Example path: # output/aww/Cute Kittens_802984323 # output/subreddit/Submission Title_urlCRC # The CRC is used so that if we are saving two albums with the same # post title (e.g. 'me_irl') we get unique folder names because the URL is different saveAlbumPath = (outputDir + u'/' + subredditDir + u'/' + imageSaver.safeFileName(albumTitle) + u'_' + str(signedCrc32(albumUrl.encode()))) #saveAlbumPath = safeFileName(saveAlbumPath, file_path = True) # If we already saved the album, skip it # Note that this means updating albums will not be updated if os.path.isdir(saveAlbumPath): # In case this is a legacy album (before database file associations), add the folder's contents # This should only really happen if the user is purposefully downloading legacy stuff (because # e.g. the script got updated) filesFound = False for root, dirs, files in os.walk(saveAlbumPath): for file in files: print("Success {} on {}" .format(utilities.outputPathToDatabasePath(os.path.join(root, file)), albumSubmission)) LikedSavedDatabase.db.onSuccessfulSubmissionDownload( albumSubmission, utilities.outputPathToDatabasePath(os.path.join(root, file))) filesFound = True if filesFound: logger.log('\t\t[already saved] ' + 'Skipping album ' + albumTitle + ' (note that this script will NOT update albums)') continue if not soft_retrieve_imgs: # Make directory for album utilities.makeDirIfNonexistant(saveAlbumPath) albumImages = [] # Don't talk to the API for soft retrieval (we don't want to waste our credits) if not soft_retrieve_imgs: # Request the list of images from Imgur albumId = imgurIdFromUrl(albumUrl) if not albumId: LikedSavedDatabase.db.addUnsupportedSubmission(albumSubmission, "Imgur album ID could not be found") else: try: if '/a/' in albumUrl: albumImages = imgurClient.get_album_images(albumId) else: albumImages = [imgurClient.get_image(albumId)] except: logger.log('Imgur album url ' + albumUrl + ' could not be retrieved!') LikedSavedDatabase.db.addUnsupportedSubmission(albumSubmission, "Imgur album hit exception") if not albumImages: continue numImages = len(albumImages) for imageIndex, image in enumerate(albumImages): imageUrl = image.link fileType = imageSaver.getFileTypeFromUrl(imageUrl) saveFilePath = saveAlbumPath + u'/' + str(imageIndex) + '.' + fileType if not soft_retrieve_imgs: # Retrieve the image and save it urlretrieve(imageUrl, saveFilePath) LikedSavedDatabase.db.onSuccessfulSubmissionDownload( albumSubmission, utilities.outputPathToDatabasePath(saveFilePath)) logger.log('\t\t[' + imageSaver.percentageComplete(imageIndex, numImages) + '] ' + ' [save] ' + imageUrl + ' saved to "' + saveAlbumPath + '"') numSavedAlbumsTotal += 1 return numSavedAlbumsTotal