import logging import os from django.core.files.uploadedfile import UploadedFile, InMemoryUploadedFile from rest_framework import status from rest_framework.exceptions import ParseError, MethodNotAllowed from rest_framework.response import Response from django_drf_filepond.models import TemporaryUpload, storage,\ TemporaryUploadChunked from io import BytesIO, StringIO from django_drf_filepond.utils import _get_user from six import text_type, binary_type # There's no built in FileNotFoundError in Python 2 try: FileNotFoundError except NameError: FileNotFoundError = IOError LOG = logging.getLogger(__name__) class FilepondFileUploader(object): @classmethod def get_uploader(cls, request): # Process the request to identify if it's a standard upload request # or a request that is related to a chunked upload. Return the right # kind of uploader to handle this. if request.method == 'PATCH': return FilepondChunkedFileUploader() if request.method == 'HEAD': return FilepondChunkedFileUploader() elif request.method == 'POST': file_obj = cls._get_file_obj(request) if (file_obj == '{}' and request.META.get('HTTP_UPLOAD_LENGTH', None)): LOG.debug('Returning CHUNKED uploader to handle ' 'upload request... ') return FilepondChunkedFileUploader() else: raise MethodNotAllowed('%s is an invalid method type' % (request.method)) # If we didn't identify the need for a chunked uploader in any of the # above tests, treat this as a standard upload LOG.debug('Returning STANDARD uploader to handle upload request... ') return FilepondStandardFileUploader() @classmethod def _get_file_obj(cls, request): # By default the upload element name is expected to be "filepond" # As raised in issue #4, there are cases where there may be more # than one filepond instance on a page, or the developer has opted # not to use the name "filepond" for the filepond instance. # Using the example from #4, this provides support these cases. upload_field_name = 'filepond' if 'fp_upload_field' in request.data: upload_field_name = request.data['fp_upload_field'] if upload_field_name not in request.data: raise ParseError('Invalid request data has been provided.') file_obj = request.data[upload_field_name] return file_obj # The file ID and upload ID are generated by _get_file_id in # django_drf_filepond.views. The TemporaryUpload model should validate # that the values provided are within spec but in some cases, e.g. when # using SQLite, that doesn't happen. We therefore provide these two # methods for doing local validation of these values since they are # passed in as parameters to handle_upload. @classmethod def _file_id_valid(cls, file_id): if isinstance(file_id, text_type) and (len(file_id) == 22): return True return False @classmethod def _upload_id_valid(cls, upload_id): if isinstance(upload_id, text_type) and (len(upload_id) == 22): return True return False class FilepondStandardFileUploader(FilepondFileUploader): def handle_upload(self, request, upload_id, file_id): # Since the upload_id and file_id are being provided here as # parameters, we check that they are valid. This should be done by # the DB and an error would be generated in the tu.save() call below # however SQLite doesn't handle field length validation so this won't # be picked up when using SQLite. if ((not self._file_id_valid(file_id)) or (not self._upload_id_valid(upload_id))): return Response('Invalid ID for handling upload.', content_type='text/plain', status=status.HTTP_500_INTERNAL_SERVER_ERROR) file_obj = self._get_file_obj(request) # The type of parsed data should be a descendant of an UploadedFile # type. if not isinstance(file_obj, UploadedFile): raise ParseError('Invalid data type has been parsed.') # Save original file name and set name of saved file to the unique ID upload_filename = file_obj.name file_obj.name = file_id # Before we attempt to save the file, make sure that the upload # directory we're going to save to exists. # *** It's not necessary to explicitly create the directory since # *** the FileSystemStorage object creates the directory on save # if not os.path.exists(storage.location): # LOG.debug('Filepond app: Creating file upload directory ' # '<%s>...' % storage.location) # os.makedirs(storage.location, mode=0o700) LOG.debug('About to store uploaded temp file with filename: %s' % (upload_filename)) # We now need to create the temporary upload object and store the # file and metadata. tu = TemporaryUpload(upload_id=upload_id, file_id=file_id, file=file_obj, upload_name=upload_filename, upload_type=TemporaryUpload.FILE_DATA, uploaded_by=_get_user(request)) tu.save() response = Response(upload_id, status=status.HTTP_200_OK, content_type='text/plain') return response # Handles chunked file uploads as per the approach described in filepond's # docs at https://pqina.nl/filepond/docs/patterns/api/server/#process-chunks class FilepondChunkedFileUploader(FilepondFileUploader): def handle_upload(self, request, upload_id, file_id=None): # Since the upload_id is being provided here as a paramter, we check # it is valid. This should be done by the DB but for some DBs, e.g. # SQLite field length validation isn't handled. The same check is # done for file_id in the case of POST requests. if not self._upload_id_valid(upload_id): return Response('Invalid ID for handling upload.', status=status.HTTP_500_INTERNAL_SERVER_ERROR) if request.method == 'PATCH': return self._handle_chunk_upload(request, upload_id) elif request.method == 'HEAD': return self._handle_chunk_restart(request, upload_id) elif request.method == 'POST': if not self._file_id_valid(file_id): return Response('Invalid ID for handling upload.', status=status.HTTP_500_INTERNAL_SERVER_ERROR) return self._handle_new_chunk_upload(request, upload_id, file_id) def _handle_new_chunk_upload(self, request, upload_id, file_id): LOG.debug('Processing a new chunked upload request...') file_obj = self._get_file_obj(request) if file_obj != '{}': return Response('An invalid file object has been received ' 'for a new chunked upload request.', status=status.HTTP_400_BAD_REQUEST) ulen = request.META.get('HTTP_UPLOAD_LENGTH', None) if not ulen: return Response('No length for new chunked upload request.', status=status.HTTP_400_BAD_REQUEST) LOG.debug('Handling a new chunked upload request for an upload ' 'with total length %s bytes' % (ulen)) # Do some general checks to make sure that the storage location # exists and that we're not being made to try and store something # outside the base storage location. Then create the new # temporary directory into which chunks will be stored base_loc = storage.base_location chunk_dir = os.path.abspath(os.path.join(base_loc, upload_id)) if not chunk_dir.startswith(base_loc): return Response('Unable to create storage for upload data.', status=status.HTTP_500_INTERNAL_SERVER_ERROR) if os.path.exists(base_loc): try: os.makedirs(chunk_dir, exist_ok=False) except OSError as e: LOG.debug('Unable to create chunk storage dir: %s' % (str(e))) return Response( 'Unable to prepare storage for upload data.', status=status.HTTP_500_INTERNAL_SERVER_ERROR) else: LOG.debug('The base data store location <%s> doesn\'t exist.' ' Unable to create chunk dir.' % (base_loc)) return Response('Data storage error occurred.', status=status.HTTP_500_INTERNAL_SERVER_ERROR) # We now create the temporary chunked upload object # this will be updated as we receive the chunks. tuc = TemporaryUploadChunked(upload_id=upload_id, file_id=file_id, upload_dir=upload_id, total_size=ulen, uploaded_by=_get_user(request)) tuc.save() return Response(upload_id, status=status.HTTP_200_OK, content_type='text/plain') def _handle_chunk_upload(self, request, chunk_id): # Check that the incoming data can be accessed. If the request # content type was invalid then we want to raise an error here # Trying to access request data should result in a 415 response if # the data couldn't be handled by the configured parser. file_data = request.data if (not chunk_id) or (chunk_id == ''): return Response('A required chunk parameter is missing.', status=status.HTTP_400_BAD_REQUEST) if isinstance(file_data, binary_type): fd = BytesIO(file_data) elif isinstance(file_data, text_type): fd = StringIO(file_data) else: return Response('Upload data type not recognised.', status=status.HTTP_400_BAD_REQUEST) # Try to load a temporary chunked upload object for the provided id try: tuc = TemporaryUploadChunked.objects.get(upload_id=chunk_id) except TemporaryUploadChunked.DoesNotExist: return Response('Invalid chunk upload request data', status=status.HTTP_400_BAD_REQUEST) # Get the required header information to handle the new data uoffset = request.META.get('HTTP_UPLOAD_OFFSET', None) ulength = request.META.get('HTTP_UPLOAD_LENGTH', None) uname = request.META.get('HTTP_UPLOAD_NAME', None) if (not uoffset) or (not ulength) or (uname is None): return Response('Chunk upload is missing required metadata', status=status.HTTP_400_BAD_REQUEST) if int(ulength) != tuc.total_size: return Response('ERROR: Upload metadata is invalid - size changed', status=status.HTTP_400_BAD_REQUEST) # if this is the first chunk, store the filename if tuc.last_chunk == 0: tuc.upload_name = uname else: if tuc.upload_name != uname: return Response('Chunk upload file metadata is invalid', status=status.HTTP_400_BAD_REQUEST) LOG.debug('Handling chunk <%s> for upload id <%s> with name <%s> ' 'size <%s> and offset <%s>...' % (tuc.last_chunk+1, chunk_id, uname, ulength, uoffset)) LOG.debug('Current length and offset in the record is: length <%s> ' ' offset <%s>' % (tuc.total_size, tuc.offset)) # Check that our recorded offset matches the offset provided by the # client...if not, there's an error. if not (int(uoffset) == tuc.offset): LOG.error('Offset provided by client <%s> doesn\'t match the ' 'stored offset <%s> for chunked upload id <%s>' % (uoffset, tuc.offset, chunk_id)) return Response('ERROR: Chunked upload metadata is invalid.', status=status.HTTP_400_BAD_REQUEST) file_data_len = len(file_data) LOG.debug('Got data from request with length %s bytes' % (file_data_len)) # Store the chunk and check if we've now completed the upload upload_dir = os.path.join(storage.base_location, tuc.upload_dir) upload_file = os.path.join(tuc.upload_dir, '%s_%s' % (tuc.file_id, tuc.last_chunk+1)) if not os.path.exists(upload_dir): return Response('Chunk storage location error', status=status.HTTP_500_INTERNAL_SERVER_ERROR) storage.save(upload_file, fd) # Set the updated chunk number and the new offset tuc.last_chunk = tuc.last_chunk + 1 tuc.offset = tuc.offset + file_data_len if tuc.offset == tuc.total_size: tuc.upload_complete = True tuc.save() # At this point, if the upload is complete, we can rebuild the chunks # into the complete file and store it with a TemporaryUpload object. if tuc.upload_complete: try: self._store_upload(tuc) except (ValueError, FileNotFoundError) as e: LOG.error('Error storing upload: %s' % (str(e))) return Response('Error storing uploaded file.', status=status.HTTP_500_INTERNAL_SERVER_ERROR) return Response(chunk_id, status=status.HTTP_200_OK, content_type='text/plain') def _store_upload(self, tuc): if not tuc.upload_complete: LOG.error('Attempt to store an incomplete upload with ID <%s>' % (tuc.upload_id)) raise ValueError('Attempt to store an incomplete upload with ID ' '<%s>' % (tuc.upload_id)) # Load each of the file parts into a BytesIO object and store them # via a TemporaryUpload object. chunk_dir = os.path.join(storage.base_location, tuc.upload_dir) file_data = BytesIO() for i in range(1, tuc.last_chunk+1): chunk_file = os.path.join(chunk_dir, '%s_%s' % (tuc.file_id, i)) if not os.path.exists(chunk_file): raise FileNotFoundError('Chunk file not found for chunk <%s>' % (i)) with open(chunk_file, 'rb') as cf: file_data.write(cf.read()) # Prepare an InMemoryUploadedFile object so that the data can be # successfully saved via the FileField in the TemporaryUpload object memfile = InMemoryUploadedFile(file_data, None, tuc.file_id, 'application/octet-stream', tuc.total_size, None) tu = TemporaryUpload(upload_id=tuc.upload_id, file_id=tuc.file_id, file=memfile, upload_name=tuc.upload_name, upload_type=TemporaryUpload.FILE_DATA, uploaded_by=tuc.uploaded_by) tu.save() # Check that the final file is stored and of the correct size stored_file_path = os.path.join(chunk_dir, tuc.file_id) if ((not os.path.exists(stored_file_path)) or (not os.path.getsize(stored_file_path) == tuc.total_size)): raise ValueError('Stored file size wrong or file not found.') LOG.debug('Full file built from chunks and saved. Deleting chunks ' 'and TemporaryUploadChunked object.') for i in range(1, tuc.last_chunk+1): chunk_file = os.path.join(chunk_dir, '%s_%s' % (tuc.file_id, i)) os.remove(chunk_file) tuc.delete() def _handle_chunk_restart(self, request, upload_id): try: tuc = TemporaryUploadChunked.objects.get(upload_id=upload_id) except TemporaryUploadChunked.DoesNotExist: return Response('Invalid upload ID specified.', status=status.HTTP_404_NOT_FOUND, content_type='text/plain') if tuc.upload_complete is True: return Response('Invalid upload ID specified.', status=status.HTTP_400_BAD_REQUEST, content_type='text/plain') # Check that the directory for the chunks exists if not os.path.exists(os.path.join(storage.base_location, tuc.upload_dir)): return Response('Invalid upload location, can\'t continue upload.', status=status.HTTP_500_INTERNAL_SERVER_ERROR, content_type='text/plain') # TODO: Is it necessary to check for the existence of all previous # chunk files here? LOG.debug('Returning offset to continue chunked upload. We have <%s> ' 'chunks so far and are at offest <%s>.' % (tuc.last_chunk, tuc.offset)) return Response(upload_id, status=status.HTTP_200_OK, headers={'Upload-Offset': str(tuc.offset)}, content_type='text/plain')