python source code of instagram

from instaloader import Instaloader

import getpass
import json
import lzma
import os
import platform
import re
import shutil
import string
import sys
import tempfile
from contextlib import contextmanager, suppress
from datetime import datetime, timezone
from functools import wraps
from hashlib import md5
from io import BytesIO
from pathlib import Path

import requests
import urllib3  # type: ignore

from instaloader.exceptions import *
from instaloader.instaloadercontext import InstaloaderContext

from typing import Any, Callable, Iterator, List, Optional, Set, Union

from instaloader.structures import (Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
						 save_structure_to_file, load_structure_from_file)


class ProfileURL(Profile):

	def get_posts(self, limit: Optional[int] = None, offset: Optional[int] = 0) -> Iterator[Post]:
		"""Retrieve all posts from a profile."""
		self._obtain_metadata()
		if limit:
			# yield from (Post(self._context, next(self._context.graphql_node_list("472f257a40c653c64c666ce877d59d2b",
			#                                                                 {'id': self.userid},
			#                                                                 'https://www.instagram.com/{0}/'.format(self.username),
			#                                                                 lambda d: d['data']['user']['edge_owner_to_timeline_media'],
			#                                                                 self._rhx_gis,
			#                                                                 self._metadata('edge_owner_to_timeline_media'))), self)
			#             for i in range(min(limit) )

			posts = []
			for node_index, node in enumerate(self._context.graphql_node_list("472f257a40c653c64c666ce877d59d2b",
																		{'id': self.userid},
																		'https://www.instagram.com/{0}/'.format(self.username),
																		lambda d: d['data']['user']['edge_owner_to_timeline_media'],
																		self._rhx_gis,
																		self._metadata('edge_owner_to_timeline_media'))):
				if node_index < offset:
					continue
				elif node_index == limit + offset:
					break
				else:
					posts.append(Post(self._context, node, self))
			yield from posts
		else:
			yield from (Post(self._context, node, self) for node in
					self._context.graphql_node_list("472f257a40c653c64c666ce877d59d2b",
													{'id': self.userid},
													'https://www.instagram.com/{0}/'.format(self.username),
													lambda d: d['data']['user']['edge_owner_to_timeline_media'],
													self._rhx_gis,
													self._metadata('edge_owner_to_timeline_media')))

class InstaloaderURL(Instaloader):

	def check_profile_id(self, profile_name: str) -> Profile:
		"""
		Consult locally stored ID of profile with given name, check whether ID matches and whether name
		has changed and return current name of the profile, and store ID of profile.

		:param profile_name: Profile name
		:return: Instance of current profile
		"""
		profile = None
		with suppress(ProfileNotExistsException):
			profile = ProfileURL.from_username(self.context, profile_name)
		profile_exists = profile is not None
		id_filename = self._get_id_filename(profile_name)
		try:
			with open(id_filename, 'rb') as id_file:
				profile_id = int(id_file.read())
			if (not profile_exists) or \
					(profile_id != profile.userid):
				if profile_exists:
					self.context.log("Profile {0} does not match the stored unique ID {1}.".format(profile_name,
																								   profile_id))
				else:
					self.context.log("Trying to find profile {0} using its unique ID {1}.".format(profile_name,
																								  profile_id))
				profile_from_id = Profile.from_id(self.context, profile_id)
				newname = profile_from_id.username
				self.context.log("Profile {0} has changed its name to {1}.".format(profile_name, newname))
				if ((format_string_contains_key(self.dirname_pattern, 'profile') or
					 format_string_contains_key(self.dirname_pattern, 'target'))):
					os.rename(self.dirname_pattern.format(profile=profile_name.lower(),
														  target=profile_name.lower()),
							  self.dirname_pattern.format(profile=newname.lower(),
														  target=newname.lower()))
				else:
					os.rename('{0}/{1}_id'.format(self.dirname_pattern.format(), profile_name.lower()),
							  '{0}/{1}_id'.format(self.dirname_pattern.format(), newname.lower()))
				return profile_from_id
			return profile
		except (FileNotFoundError, ValueError):
			pass
		if profile_exists:
			self.save_profile_id(profile)
			return profile
		raise ProfileNotExistsException("Profile {0} does not exist.".format(profile_name))

	def profile_posts_urls(self, profile_name: Union[str, Profile],
						 profile_pic: bool = False, profile_pic_only: bool = False,
						 fast_update: bool = False,
						 download_stories: bool = False, download_stories_only: bool = False,
						 download_tagged: bool = False, download_tagged_only: bool = False,
						 post_filter: Optional[Callable[[Post], bool]] = None,
						 storyitem_filter: Optional[Callable[[StoryItem], bool]] = None,
						 limit: Optional[int] = None,
						 offset: Optional[int] = 0) -> List[str]:
		"""Download one profile

		.. deprecated:: 4.1
		   Use :meth:`Instaloader.download_profiles`.
		"""

		# Get profile main page json
		# check if profile does exist or name has changed since last download
		# and update name and json data if necessary
		if isinstance(profile_name, str):
			profile = self.check_profile_id(profile_name.lower())
		else:
			profile = profile_name

		profile_name = profile.username

		url_pics = []

		# Save metadata as JSON if desired.
		if self.save_metadata is not False:
			json_filename = '{0}/{1}_{2}'.format(self.dirname_pattern.format(profile=profile_name, target=profile_name),
												 profile_name, profile.userid)
			self.save_metadata_json(json_filename, profile)

		if self.context.is_logged_in and profile.has_blocked_viewer and not profile.is_private:
			# raising ProfileNotExistsException invokes "trying again anonymously" logic
			raise ProfileNotExistsException("Profile {} has blocked you".format(profile_name))

		# Download profile picture
		if profile_pic or profile_pic_only:
			with self.context.error_catcher('Download profile picture of {}'.format(profile_name)):
				# self.download_profilepic(profile)
				pass
		if profile_pic_only:
			return

		# Catch some errors
		if profile.is_private:
			if not self.context.is_logged_in:
				raise LoginRequiredException("profile %s requires login" % profile_name)
			if not profile.followed_by_viewer and \
					self.context.username != profile.username:
				raise PrivateProfileNotFollowedException("Profile %s: private but not followed." % profile_name)
		else:
			if self.context.is_logged_in and not (download_stories or download_stories_only):
				self.context.log("profile %s could also be downloaded anonymously." % profile_name)

		# Download stories, if requested
		if download_stories or download_stories_only:
			if profile.has_viewable_story:
				with self.context.error_catcher("Download stories of {}".format(profile_name)):
					# self.download_stories(userids=[profile.userid], filename_target=profile_name,
										  # fast_update=fast_update, storyitem_filter=storyitem_filter)
					pass
			else:
				self.context.log("{} does not have any stories.".format(profile_name))
		if download_stories_only:
			return

		# Download tagged, if requested
		if download_tagged or download_tagged_only:
			with self.context.error_catcher('Download tagged of {}'.format(profile_name)):
				# self.download_tagged(profile, fast_update=fast_update, post_filter=post_filter)
				pass
		if download_tagged_only:
			return

		# Iterate over pictures and download them
		self.context.log("Retrieving posts from profile {}.".format(profile_name))
		totalcount = profile.mediacount
		count = 1
		for post in profile.get_posts(limit=limit, offset=offset):
			self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
			count += 1
			if post_filter is not None and not post_filter(post):
				self.context.log('<skipped>')
				continue
			with self.context.error_catcher('Download URL profile {}'.format(profile_name)):
				downloaded, url_pics_post = self.url_post(post, target=profile_name)
				for url_pic_post in url_pics_post:
					url_pics.append(url_pic_post)

				if fast_update and not downloaded:
					break

		return url_pics

	def url_post(self, post: Post, target: Union[str, Path]) -> (bool, List[str]):
		"""
		Get URL of one instagram post node.

		:param post: Post to get URL.
		:param target: Target name, i.e. profile name, #hashtag, :feed; for filename.
		:return: True if something was downloaded, False otherwise, i.e. file was already there
		"""

		# Download the image(s) / video thumbnail and videos within sidecars if desired
		downloaded = True
		url_pics = []
		if self.download_pictures:
			if post.typename == 'GraphSidecar':
				edge_number = 1
				for sidecar_node in post.get_sidecar_nodes():
					# Download picture or video thumbnail
					if not sidecar_node.is_video or self.download_video_thumbnails is True:
						downloaded &= True
						url_pics.append(sidecar_node.display_url)
					# Additionally download video if available and desired
					if sidecar_node.is_video and self.download_videos is True:
						# downloaded &= self.download_pic(filename=filename, url=sidecar_node.video_url,
						#                                 mtime=post.date_local, filename_suffix=str(edge_number))
						pass
					edge_number += 1
			elif post.typename == 'GraphImage':
				downloaded = True
				url_pics.append(post.url)
			elif post.typename == 'GraphVideo':
				if self.download_video_thumbnails is True:
					# downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local)
					pass
			else:
				self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename))

		# # Save caption if desired
		# metadata_string = _ArbitraryItemFormatter(post).format(self.post_metadata_txt_pattern).strip()
		# if metadata_string:
		#     self.save_caption(filename=filename, mtime=post.date_local, caption=metadata_string)

		# Download video if desired
		if post.is_video and self.download_videos is True:
			# downloaded &= self.download_pic(filename=filename, url=post.video_url, mtime=post.date_local)
			pass

		# Download geotags if desired
		if self.download_geotags and post.location:
			# self.save_location(filename, post.location, post.date_local)
			pass

		# Update comments if desired
		if self.download_comments is True:
			# self.update_comments(filename=filename, post=post)
			pass

		# Save metadata as JSON if desired.
		if self.save_metadata is not False:
			# self.save_metadata_json(filename, post)
			pass

		self.context.log()
		return downloaded, url_pics


def viewImageFromURL(url_pic):
	import requests, cv2
	from keras_retinanet.utils.image import read_image_bgr
	import matplotlib.pyplot as plt

	r = requests.get(url_pic, allow_redirects=True)
	image = read_image_bgr(BytesIO(r.content))

	# copy to draw on
	draw = image.copy()
	
	draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)


	plt.figure(figsize=(15, 15))
	plt.axis('off')
	plt.imshow(draw)
	plt.show()


def getImageFromURL(url_pic, draw=False):
	''' The image is the one that will be processed, the draw is the one to be shown '''
	import requests, cv2
	from keras_retinanet.utils.image import read_image_bgr

	r = requests.get(url_pic, allow_redirects=True)
	image = read_image_bgr(BytesIO(r.content))

	

	if draw:
		# copy to draw on
		draw = image.copy()
		
		draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

		return image, draw
	else:
		return image



def getImageFromFilePath(img_path, draw=False):
	''' The image is the one that will be processed, the draw is the one to be shown '''
	import requests, cv2
	from keras_retinanet.utils.image import read_image_bgr

	image = read_image_bgr(img_path)

	

	if draw:
		# copy to draw on
		draw = image.copy()
		
		draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

		return image, draw
	else:
		return image



def instagramImpl(profile, limit=None, offset=0, process_images=True, profile_stats=True, choice=None, restore_result=False):      
	from maskrcnn_modanet.instagram_impl import InstaloaderURL
	import matplotlib.pyplot as plt
	import cv2

	import time

	import json
	import codecs
	import os

	import numpy as np

	from maskrcnn_modanet.cli import validators

	from PIL import Image



	with open(os.path.expanduser('~')+ '/.maskrcnn-modanet/' + 'savedvars.json') as f:
		savedvars = json.load(f)
	path = savedvars['datapath']

	img_path = path + "datasets/coco/images/"
	ann_path = path + "datasets/coco/annotations/"
	snp_path = path + "results/snapshots"


	timestr = time.strftime("%Y%m%d-%H%M%S")

	profile_path = path + 'results/instagram/'+ profile + '/'

	save_images_path = profile_path + 'images/'

	save_segments_path = profile_path + 'segments/'

	log_path = profile_path + timestr + '.txt'


	from instaloader import (InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException,
			   StoryItem, __version__, load_structure_from_file, TwoFactorAuthRequiredException,
			   BadCredentialsException)
	

	if not restore_result:

		instaloader = InstaloaderURL(dirname_pattern=path+'/results/instagram/{target}',download_pictures=True, download_videos=False, download_video_thumbnails=False,
								 download_geotags=False,
								 download_comments=False, save_metadata=False,
							)

		target = profile

		profile = instaloader.check_profile_id(target)

		log_file = open(log_path, 'w+')

		
		if os.path.exists(save_images_path):
			for the_file in os.listdir(save_images_path):
				file_path = os.path.join(save_images_path, the_file)
				try:
					if os.path.isfile(file_path):
						os.unlink(file_path)
					#elif os.path.isdir(file_path): shutil.rmtree(file_path)
				except Exception as e:
					print(e)

		save_images_path = validators.check_if_folder_exists(None, None, save_images_path)

		print(profile)

		url_pics = instaloader.profile_posts_urls(profile, limit=limit, offset=offset)

		# print(url_pics)

		pics = []

		# for url_pic in url_pics:


		if process_images and not profile_stats:
			from maskrcnn_modanet.processimages import loadModel, main

			model, labels_to_names = loadModel(model_type='default')
		elif process_images and profile_stats:
			from maskrcnn_modanet.processimages import loadModel

			model, labels_to_names = loadModel(model_type='coco')

			print('Now looking for images with only one person in the image..')

			
		
		from keras_retinanet.utils.image import read_image_bgr
		import requests
		from io import BytesIO

		url_pics_person = []

		pic_index = 0
		for url_pic in url_pics:
			if not process_images:
				viewImageFromURL(url_pic)
			elif process_images and not profile_stats:
				# image, draw = getImageFromURL(url_pic, draw=True)

				# img_anns = apply_mask(model, image, threshold_score=0.5)

				# for img_ann in img_anns:
				# 	if img_ann['category'] == 'person':


				main(proc_img_path=None, proc_img_url=url_pic, all_set=False, save_images_path=None, model_path=None, 
					segments=False, annotations=False, threshold_score=0.5, limit=None, model=model, labels_to_names=labels_to_names)
			elif process_images and profile_stats:
				from maskrcnn_modanet.processimages import apply_mask

				print(pic_index, end=' ')
				print(pic_index, end=' ', file=log_file)
				print(url_pic, file=log_file)

				try:
					image, draw = getImageFromURL(url_pic, draw=True)
				except Exception:
					print('Image ', pic_index, 'failed to download. Url tried below:\n' + url_pic + '\n\n Continuing to next image..')
					print('Image ', pic_index, 'failed to download. Url tried below:\n' + url_pic + '\n\n Continuing to next image..', file=log_file)
					continue

				image_area = len(image) * len(image[0])

				pic_index += 1

				img_anns = apply_mask(model, image, draw=draw, labels_to_names=labels_to_names, image_segments=False)

				one_person = 0
				for img_ann in img_anns:
					if img_ann['category'] == 'person' and img_ann['bbox'][2] * img_ann['bbox'][3] >= 0.1 * image_area:
						one_person += 1
						print(one_person, 'person that covers ', round(img_ann['bbox'][2] * img_ann['bbox'][3] / image_area * 100),'%  of the image found in this photo', file=log_file)
					if one_person > 1:
						# we only select images with only one person that covers an area greater than 10% of the image
						print('Too many people found in this photo', file=log_file)
						one_person = 0
						break

				if one_person:
					# # show the image
					# plt.figure(figsize=(15, 15))
					# plt.axis('off')
					# plt.imshow(draw)
					# plt.show()

					# add the pic to the new urls
					url_pics_person.append([pic_index, url_pic])

		if process_images and profile_stats:
			print('We\'ve now selected the images that are probably the ones with only the person who owns this account.')
			if not choice:
				choice = ''
			while choice not in ['i', 's']:
				choice = input('Do you want to see the images processed or to see some stats? Type \'i\' for image, \'s\' for stats: ')

			# now let's switch to ModaNet and look into the image

			model, labels_to_names = loadModel(model_type='default')

			for label_index in labels_to_names:
				segment_label_path = save_segments_path + labels_to_names[label_index] + '/'

				# remove all previous segments saved

				if os.path.exists(segment_label_path):
					for the_file in os.listdir(segment_label_path):
						file_path = os.path.join(segment_label_path, the_file)
						try:
							if os.path.isfile(file_path):
								os.unlink(file_path)
							#elif os.path.isdir(file_path): shutil.rmtree(file_path)
						except Exception as e:
							print(e)

				segment_label_path = validators.check_if_folder_exists(None, None, segment_label_path)

			labels_images = {}

			for pic_index, url_pic in url_pics_person:
				image, draw = getImageFromURL(url_pic, draw=True)

				print(pic_index, end=' ')
				print(pic_index, end=' ', file=log_file)
				print(url_pic, file=log_file)

				

				if choice == 'i':
					img_anns = apply_mask(model, image, draw=draw, labels_to_names=labels_to_names, image_segments=False)
					# show the image
					plt.figure(figsize=(15, 15), num=str(pic_index))
					plt.axis('off')
					plt.imshow(draw)

					plt.show()
				elif choice == 's':
					img_anns = apply_mask(model, image, draw=draw, labels_to_names=labels_to_names, image_segments=True)

					# save the images for easy retrieval
					# plt.figure(num=str(pic_index), dpi=400)
					# plt.axis('off')
					# plt.imshow(draw)
					# plt.savefig(save_images_path + str(pic_index) + '.png')
					# plt.close()
					processed_image = Image.fromarray(draw, 'RGB')
					processed_image.save(save_images_path + str(pic_index) + '.png')
					del processed_image

					# let's count
					labels_images[pic_index] = {}
					for label_index in labels_to_names:
						labels_images[pic_index][labels_to_names[label_index]] = []

					for img_ann in img_anns:
						save_segment_path = save_segments_path + img_ann['category'] + '/' + str(pic_index) + '_.png'
						segment_counter = 1
						while os.path.isfile(save_segment_path):
							save_segment_path = "_".join(save_segment_path.split("_")[:-1]) + "_" + str(segment_counter) + '.png'
							segment_counter += 1
						segment = Image.fromarray(img_ann.pop('segment'), 'RGB')
						segment.save(save_segment_path)
						del segment
						img_ann['segment'] = save_segment_path
						labels_images[pic_index][img_ann['category']].append(img_ann)

			if choice == 's':
				results = {
					'url_pics': url_pics,
					'url_pics_person': url_pics_person,
					'labels_to_names': labels_to_names
				}

				print('Saving annotations results for easy recovery.. Use -r option later')
				with open(profile_path + 'results.json', 'w') as outfile:
					json.dump(results, outfile)
				print('Now saving annotations..')
				with open(profile_path + 'labels_images.json', 'wb') as outfile:
					np.save(outfile, labels_images)


	elif restore_result:
		log_file = open(log_path, 'w+')
		
		print('Restoring results..')
		with open(profile_path + 'results.json') as f:
			results = json.load(f)
		print('Restoring annotations..')
		with open(profile_path + 'labels_images.json', 'rb') as f:
			labels_images = np.load(f, allow_pickle=True)[()]
			# credit goes to https://stackoverflow.com/questions/30811918/saving-dictionary-of-numpy-arrays

		url_pics = results['url_pics']
		url_pics_person = results['url_pics_person']
		labels_to_names = results['labels_to_names']

		print('We\'ve now recovered the images that are probably the ones with only the person who owns this account, processed to look for apparel and clothing items.')
		if not choice:
			choice = ''
		while choice not in ['i', 's']:
			choice = input('Do you want to see the images processed or to see some stats? Type \'i\' for image, \'s\' for stats: ')

	if process_images and profile_stats:
		if choice == 'i' and restore_result:
			for pic_index, url_pic in url_pics_person:
				image, draw = getImageFromFilePath(save_images_path + str(pic_index) + '.png', draw=True)
				plt.figure(figsize=(9, 9), num=str(pic_index))
				plt.axis('off')
				plt.imshow(draw)
				plt.show()



		elif choice == 's':
			print('I will now show you all the stats I can think of:')
			print('I will now show you all the stats I can think of:', file=log_file)
			print('Total images:', len(url_pics), 'Images with one person: ', len(url_pics_person))
			print('Total images:', len(url_pics), 'Images with one person: ', len(url_pics_person), file=log_file)
			print(round(len(url_pics_person)/len(url_pics)*100, 1), '%  of the images contain only one main subject (probably the account owner)')
			print(round(len(url_pics_person)/len(url_pics)*100, 1), '%  of the images contain only one main subject (probably the account owner)', file=log_file)

			# for label_index in labels_to_names:
			# 	label = label[label_index]
			# how many of each label
			len_labels = {}
			
			for label_index in labels_to_names:
				len_labels[labels_to_names[label_index]] = 0
				
			for pic_index in labels_images:
				for label in labels_images[pic_index]:
					len_labels[label] += len(labels_images[pic_index][label])
			sum_len_labels = sum(len_labels[i] for i in len_labels)
			print('There are ', sum_len_labels, ' total instances of labels')
			print('There are ', sum_len_labels, ' total instances of labels', file=log_file)
			print(f'Instances of labels per image: {sum_len_labels/len(url_pics_person):4.2f}')
			print(f'Instances of labels per image: {sum_len_labels/len(url_pics_person):4.2f}', file=log_file)
			for label in len_labels:

				perc_label = len_labels[label]/sum_len_labels
				avg_label = sum(len(labels_images[pic_index][label]) for pic_index in labels_images)/len(labels_images)
				print(f'Label: {label:15} Perc: {len_labels[label]/sum_len_labels:>6.1%} | Per Image: Avg: {len_labels[label]/len(labels_images):>4.2f} Max: {max(len(labels_images[pic_index][label]) for pic_index in labels_images)}')
				print(f'Label: {label:15} Perc: {len_labels[label]/sum_len_labels:>6.1%} | Per Image: Avg: {len_labels[label]/len(labels_images):>4.2f} Max: {max(len(labels_images[pic_index][label]) for pic_index in labels_images)}', file=log_file)

			label_again = True
			while label_again:
				label = ' '
				while label not in len_labels and label != '':
					label = input('Insert a label to see its instances!\nUseful if you want to see which shoes your favourite instagram user has.\nYou can see the labels above. Press enter to abort: ')
				if label == '':
					label_again = False
					break
				segments = [img_ann['segment'] for pic_index in labels_images
												for img_ann in labels_images[pic_index][label] ]
				if len(segments) > 0:
					print('There are ', len(segments), ' results. Tell me the start and the end, as if you were slicing a Python array')
					print('You can also find the results in the folder:\n' + "/".join(segments[0].split("/")[:-1]))
					from_i = input('Start: ')
					if from_i == '':
						from_i = None
					else:
						from_i = int(from_i)
					to_i = input('End: ')
					if to_i == '':
						to_i = None
					else:
						to_i = int(to_i)

					

					for segment_path in segments[from_i:to_i]:
						img = Image.open(segment_path)
						img.show(title=segment_path.split('/')[-1])
						del img
				else:
					print('There are no segments to show for this label.\n')
					
					# plt.figure(figsize=(5, 5), num=str(pic_index), dpi=400)
					# plt.axis('off')
					# plt.imshow(segment)
					# plt.show()




	print('You can find the logs as a txt file in:\n' + log_path)

	log_file.close()