from vergeml.utils import download_files from vergeml.utils import Error import shutil import os.path import zipfile import tarfile def download(env): """The Street View House Numbers (SVHN) Dataset. SVHN is a real-world image dataset for developing machine learning and object recognition algorithms with minimal requirement on data preprocessing and formatting. It can be seen as similar in flavor to MNIST (e.g., the images are of small cropped digits), but incorporates an order of magnitude more labeled data (over 600,000 digit images) and comes from a significantly harder, unsolved, real world problem (recognizing digits and numbers in natural scene images). SVHN is obtained from house numbers in Google Street View images. Authors: Yuval Netzer, Tao Wang, Adam Coates, Alessandro Bissacco, Bo Wu, Andrew Y. Ng Reading Digits in Natural Images with Unsupervised Feature Learning NIPS Workshop on Deep Learning and Unsupervised Feature Learning 2011. http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf For more information visit: http://ufldl.stanford.edu/housenumbers/""" urls = ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat", "http://ufldl.stanford.edu/housenumbers/test_32x32.mat"] samples_dir = env.get('base.samples_dir') print("Downloading SVHN to {}.".format(samples_dir)) src_dir = download_files(urls, dir=env.get('base.cache_dir')) for file in ("train_32x32.mat", "test_32x32.mat", ): shutil.copy(os.path.join(src_dir, file), samples_dir) shutil.rmtree(src_dir) print("Finished downloading SVHN.") download.__info__ = [ ('Samples', '73K'), ('Test Samples', '26K'), ('Type', 'Labeled Images'), ('Resolution', '32x32 rgb'), ('Size', '235.3 MB') ]