# -*- coding: utf-8 -*-
# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to write output to various location types."""

from __future__ import unicode_literals

import errno
import json
import logging
import os
import re
import shutil
import time

from turbinia import config
from turbinia import TurbiniaException

config.LoadConfig()
if config.GCS_OUTPUT_PATH and config.GCS_OUTPUT_PATH.lower() is not 'none':
  from google.cloud import storage
  from google.cloud import exceptions

log = logging.getLogger('turbinia')


class OutputManager(object):
  """Manages output data.

  Manages the configured output writers.  Also saves and retrieves evidence data
  as well as other files that are created when running tasks.

  Attributes:
    _output_writers (list): The configured output writers
    is_setup (bool): Whether this object has been setup or not.
  """

  def __init__(self):
    self._output_writers = None
    self.is_setup = False

  @staticmethod
  def get_output_writers(task):
    """Get a list of output writers.

    Args:
      task: A TurbiniaTask object

    Returns:
      A list of OutputWriter objects.
    """
    epoch = str(int(time.time()))
    unique_dir = '{0:s}-{1:s}-{2:s}'.format(epoch, str(task.id), task.name)

    writers = [
        LocalOutputWriter(
            base_output_dir=task.base_output_dir, unique_dir=unique_dir)
    ]
    local_output_dir = writers[0].local_output_dir
    config.LoadConfig()
    if config.GCS_OUTPUT_PATH:
      writer = GCSOutputWriter(
          unique_dir=unique_dir, gcs_path=config.GCS_OUTPUT_PATH,
          local_output_dir=local_output_dir)
      writers.append(writer)
    return writers

  def get_local_output_dirs(self):
    """Gets the local output directories from the local output writer.

    Returns:
      Tuple(string): (Path to temp directory, path to local output directory)

    Raises:
      TurbiniaException: If no local output writer with output_dir is found.
    """
    if not self._output_writers:
      raise TurbiniaException('No output writers found.')

    # Get the local writer
    writer = [w for w in self._output_writers if w.name == 'LocalWriter'][0]
    if not hasattr(writer, 'local_output_dir'):
      raise TurbiniaException(
          'Local output writer does not have local_output_dir attribute.')

    if not writer.local_output_dir:
      raise TurbiniaException(
          'Local output writer attribute local_output_dir is not set')

    if not hasattr(writer, 'tmp_dir'):
      raise TurbiniaException(
          'Local output writer does not have tmp_dir attribute.')

    if not writer.tmp_dir:
      raise TurbiniaException(
          'Local output writer attribute tmp_dir is not set')

    return (writer.tmp_dir, writer.local_output_dir)

  def retrieve_evidence(self, evidence_):
    """Retrieves evidence data from remote location.

    Args:
      evidence_: Evidence object

    Returns:
      An evidence object
    """
    for writer in self._output_writers:
      if writer.name == evidence_.saved_path_type:
        log.info(
            'Retrieving copyable evidence data from {0:s}'.format(
                evidence_.saved_path))
        evidence_.local_path = writer.copy_from(evidence_.saved_path)
    return evidence_

  def save_evidence(self, evidence_, result):
    """Saves local evidence data to remote location.

    Args:
      evidence_ (Evidence): Evidence to save data from
      result (TurbiniaTaskResult): Result object to save path data to

    Returns:
      An evidence object

    Raises:
      TurbiniaException: If serialization or writing of evidence config fails
    """
    path, path_type, local_path = self.save_local_file(
        evidence_.local_path, result)

    if evidence_.save_metadata:
      metadata = evidence_.config.copy()
      metadata['evidence_path'] = path
      metadata_path = '{0:s}.metadata.json'.format(local_path)
      try:
        json_str = json.dumps(metadata)
      except TypeError as exception:
        raise TurbiniaException(
            'Could not serialize Evidence config for {0:s}: {1:s}'.format(
                evidence_.name, exception))

      try:
        log.debug('Writing metadata file to {0:s}'.format(metadata_path))
        with open(metadata_path, 'wb') as file_handle:
          file_handle.write(json_str.encode('utf-8'))
      except IOError as exception:
        raise TurbiniaException(
            'Could not write metadata file {0:s}: {1:s}'.format(
                metadata_path, exception))

      self.save_local_file(metadata_path, result)

    # Set the evidence local_path from the saved path info so that in cases
    # where tasks are saving evidence into the temp dir, we'll get the newly
    # copied version from the saved output path.
    if local_path:
      evidence_.local_path = local_path
    evidence_.saved_path = path
    evidence_.saved_path_type = path_type
    if evidence_.saved_path:
      log.info(
          'Saved copyable evidence data to {0:s}'.format(evidence_.saved_path))
    return evidence_

  def save_local_file(self, file_, result):
    """Saves local file by writing to all output writers.

    Most local files will already be in the local output directory and won't
    need to be copied by the LocalOutputWriter, but any files outside of this
    directory (e.g. files in the tmp_dir) will still be copied locally.

    Args:
      file_ (string): Path to file to save.
      result (TurbiniaTaskResult): Result object to save path data to

    Returns:
      Tuple of (String of last written file path,
                String of last written file destination output type,
                Local path if saved locally, else None)
    """
    saved_path = None
    saved_path_type = None
    local_path = None
    for writer in self._output_writers:
      new_path = writer.copy_to(file_)
      if result:
        if new_path:
          result.saved_paths.append(new_path)
          saved_path = new_path
          saved_path_type = writer.name
        elif os.path.exists(file_) and os.path.getsize(file_) > 0:
          # We want to save the old path if the path is still valid.
          result.saved_paths.append(file_)

      if writer.name == LocalOutputWriter.NAME:
        local_path = new_path

    return saved_path, saved_path_type, local_path

  def setup(self, task):
    """Setup OutputManager object."""
    self._output_writers = self.get_output_writers(task)
    self.is_setup = True


class OutputWriter(object):
  """Base class.

  By default this will write the files the Evidence objects point to along with
  any other files explicitly written with copy_to().

  Attributes:
    base_output_dir (string): The base path for output.  The value is specific
        to the output writer object type.
    local_output_dir: The full path for the local output dir.
    name (string): Name of this output writer
    unique_dir (string): A psuedo-unique string to be used in paths.
  """

  NAME = 'base_output_writer'

  def __init__(
      self, base_output_dir=None, unique_dir=None, local_output_dir=None):
    """Initialization for OutputWriter.

    Args:
      base_output_dir (string): The base path for output.  Set to the configured
          OUTPUT_DIR by default.
      local_output_dir: The full path for the local output dir.  This will be
          generated automatically if not set.
      unique_dir (string):  A psuedo-unique string to be used in paths. This
          will be generated automatically if not set.
    """
    self.unique_dir = unique_dir
    self.name = self.NAME

    if base_output_dir:
      self.base_output_dir = base_output_dir
    else:
      config.LoadConfig()
      self.base_output_dir = config.OUTPUT_DIR

    if local_output_dir:
      self.local_output_dir = local_output_dir
    else:
      self.local_output_dir = self.create_output_dir()

  def create_output_dir(self, base_path=None):
    """Creates a unique output path for this task and creates directories.

    Needs to be run at runtime so that the task creates the directory locally.

    Args:
      base_path(string): Base directory output directory will be created in.

    Returns:
      A local output path string.

    Raises:
      TurbiniaException: If there are failures creating the directory.
    """
    raise NotImplementedError

  def copy_to(self, source_file):
    """Copies file to the managed location.

    Files will be copied into base_output_dir with a filename set to the
    basename of the source file.

    Args:
      source_file (string): A path to a local source file.

    Returns:
      The path the file was saved to, or None if file was not written.

    Raises:
      TurbiniaException: When the source file is empty or there are problems
          saving the file.
    """
    raise NotImplementedError

  def copy_from(self, source_file):
    """Copies output file from the managed location to the local output dir.

    Args:
      source_file (string): A path to a source file in the managed storage
          location.  This path should be in a format matching the storage type
          (e.g. GCS paths are formatted like 'gs://bucketfoo/' and local paths
          are like '/foo/bar'.

    Returns:
      The path the file was saved to, or None if file was not written.

    Raises:
      TurbiniaException: When there are problems copying from storage.
    """
    raise NotImplementedError


class LocalOutputWriter(OutputWriter):
  """Class for writing to local filesystem output.

  Attributes:
    tmp_dir (string): Path to temp directory
  """

  NAME = 'LocalWriter'

  # pylint: disable=keyword-arg-before-vararg
  def __init__(self, base_output_dir=None, *args, **kwargs):
    super(LocalOutputWriter, self).__init__(
        base_output_dir=base_output_dir, *args, **kwargs)
    config.LoadConfig()
    self.tmp_dir = self.create_output_dir(base_path=config.TMP_DIR)

  def create_output_dir(self, base_path=None):
    base_path = base_path if base_path else self.base_output_dir
    output_dir = os.path.join(base_path, self.unique_dir)
    if not os.path.exists(output_dir):
      try:
        log.debug('Creating new directory {0:s}'.format(output_dir))
        os.makedirs(output_dir)
      except OSError as exception:
        if exception.errno == errno.EACCES:
          message = 'Permission error ({0:s})'.format(str(exception))
        else:
          message = str(exception)
        raise TurbiniaException(message)

    return output_dir

  def _copy(self, file_path):
    """Copies file to local output dir.

    Args:
      file_path(string): Source path to the file to copy.

    Returns:
      The path the file was saved to, or None if file was not written.
    """
    destination_file = os.path.join(
        self.local_output_dir, os.path.basename(file_path))

    if self.local_output_dir in os.path.commonprefix([file_path,
                                                      destination_file]):
      log.debug(
          'Not copying source file {0:s} already in output dir {1:s}'.format(
              file_path, self.local_output_dir))
      return None
    if not os.path.exists(file_path):
      log.warning('Source file [{0:s}] does not exist.'.format(file_path))
      return None
    if os.path.exists(destination_file):
      log.warning(
          'Target output file path [{0:s}] already exists.'.format(
              destination_file))
      return None

    shutil.copy(file_path, destination_file)
    log.debug('Copied file {0:s} to {1:s}'.format(file_path, destination_file))
    return destination_file

  def copy_to(self, source_file):
    return self._copy(source_file)

  def copy_from(self, source_file):
    return self._copy(source_file)


class GCSOutputWriter(OutputWriter):
  """Output writer for Google Cloud Storage.

  attributes:
    bucket (string): Storage bucket to put output results into.
    client (google.cloud.storage.Client): GCS Client
  """

  CHUNK_SIZE = 10 * (2**20)  # 10MB by default

  NAME = 'GCSWriter'

  def __init__(self, gcs_path, *args, **kwargs):
    """Initialization for GCSOutputWriter.

    Args:
      gcs_path (string): GCS path to put output results into.
    """
    super(GCSOutputWriter, self).__init__(*args, **kwargs)
    config.LoadConfig()
    self.client = storage.Client(project=config.TURBINIA_PROJECT)

    self.bucket, self.base_output_dir = self._parse_gcs_path(gcs_path)

  @staticmethod
  def _parse_gcs_path(file_):
    """Get the bucket and path values from a GCS path.

    Args:
      file_ (string): GCS file path.

    Returns:
      A tuple of ((string) bucket, (string) path)
    """
    match = re.search(r'gs://(.*?)/(.*$)', file_)
    if not match:
      raise TurbiniaException(
          'Cannot find bucket and path from GCS config {0:s}'.format(file_))
    return match.group(1), match.group(2)

  def create_output_dir(self, base_path=None):
    # Directories in GCS are artificial, so any path can be written as part of
    # the object name.
    pass

  def copy_to(self, source_path):
    if os.path.getsize(source_path) == 0:
      message = (
          'Local source file {0:s} is empty.  Not uploading to GCS'.format(
              source_path))
      log.error(message)
      raise TurbiniaException(message)

    bucket = self.client.get_bucket(self.bucket)
    destination_path = os.path.join(
        self.base_output_dir, self.unique_dir, os.path.basename(source_path))
    log.info(
        'Writing {0:s} to GCS path {1:s}'.format(source_path, destination_path))
    try:
      blob = storage.Blob(destination_path, bucket, chunk_size=self.CHUNK_SIZE)
      blob.upload_from_filename(source_path, client=self.client)
    except exceptions.GoogleCloudError as exception:
      message = 'File upload to GCS failed: {0!s}'.format(exception)
      log.error(message)
      raise TurbiniaException(message)
    return os.path.join('gs://', self.bucket, destination_path)

  def copy_from(self, source_path):
    """Copies output file from the managed location to the local output dir.

    Args:
      source_file (string): A path to a source file in the managed storage
          location.  This path should be in a format matching the storage type
          (e.g. GCS paths are formatted like 'gs://bucketfoo/' and local paths
          are like '/foo/bar'.

    Returns:
      The path the file was saved to, or None if file was not written.

    Raises:
      TurbiniaException: If file retrieval fails.
    """
    bucket = self.client.get_bucket(self.bucket)
    gcs_path = self._parse_gcs_path(source_path)[1]
    destination_path = os.path.join(
        self.local_output_dir, os.path.basename(source_path))
    log.info(
        'Writing GCS file {0:s} to local path {1:s}'.format(
            source_path, destination_path))
    try:
      blob = storage.Blob(gcs_path, bucket, chunk_size=self.CHUNK_SIZE)
      blob.download_to_filename(destination_path, client=self.client)
    except exceptions.RequestRangeNotSatisfiable as exception:
      message = (
          'File retrieval from GCS failed, file may be empty: {0!s}'.format(
              exception))
      log.error(message)
      raise TurbiniaException(message)
    except exceptions.GoogleCloudError as exception:
      message = 'File retrieval from GCS failed: {0!s}'.format(exception)
      log.error(message)
      raise TurbiniaException(message)

    if not os.path.exists(destination_path):
      message = (
          'File retrieval from GCS failed: Local file {0:s} does not '
          'exist'.format(destination_path))
      log.error(message)
      raise TurbiniaException(message)
    return destination_path