// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.archivepatcher.generator;

import com.google.archivepatcher.shared.RandomAccessFileInputStream;

import java.io.IOException;
import java.io.InputStream;
import java.util.zip.ZipException;

/**
 * A minimal set of zip-parsing utilities just adequate to produce a {@link MinimalZipEntry} and
 * update it. This parser is neither robust nor exhaustive. The parser is built to understand
 * version 2.0 of the ZIP specification, with the notable exception that it does not have support
 * for encrypted central directories.
 * <p>
 * The offsets, lengths and fields that this parser understands and exposes are based on version
 * 6.3.3 of the ZIP specification (the most recent available at the time of this writing), which may
 * be found at the following URL:
 * <br><ul><li>https://www.pkware.com/documents/APPNOTE/APPNOTE-6.3.3.TXT</li></ul>
 * <p>
 * Please note that the parser does not attempt to verify the version-needed-to-extract field, since
 * there is no guarantee that all ZIP implementations have set the value correctly to the minimum
 * needed to truly support extraction.
 */
class MinimalZipParser {

  /**
   * Standard 32-bit signature for a "end-of-central-directory" record in a ZIP-like archive. This
   * is in little-endian order.
   */
  public static final int EOCD_SIGNATURE = 0x06054b50;

  /**
   * Standard 32-bit signature for a "central directory entry" record in a ZIP-like archive. This is
   * in little-endian order.
   */
  public static final int CENTRAL_DIRECTORY_ENTRY_SIGNATURE = 0x02014b50;

  /**
   * Standard 32-bit signature for a "local file entry" in a ZIP-like archive. This is in
   * little-endian order.
   */
  public static final int LOCAL_ENTRY_SIGNATURE = 0x04034b50;

  /**
   * Read exactly one byte, throwing an exception if unsuccessful.
   * @param in the stream to read from
   * @return the byte read
   * @throws IOException if EOF is reached
   */
  private static int readByteOrDie(InputStream in) throws IOException {
    int result = in.read();
    if (result == -1) {
      throw new IOException("EOF");
    }
    return result;
  }

  /**
   * Skips exactly the specified number of bytes, throwing an exception if unsuccessful.
   * @param in the stream to read from
   * @param numBytes the number of bytes to skip
   * @throws IOException if EOF is reached or no more bytes can be skipped
   */
  private static void skipOrDie(InputStream in, long numBytes) throws IOException {
    long numLeft = numBytes;
    long numSkipped = 0;
    while ((numSkipped = in.skip(numLeft)) > 0) {
      numLeft -= numSkipped;
    }
    if (numLeft != 0) {
      throw new IOException("Unable to skip");
    }
  }

  /**
   * Reads 2 bytes from the current offset as an unsigned, 32-bit little-endian value.
   * @param in the stream to read from
   * @return the value as a java int
   * @throws IOException if unable to read
   */
  private static int read16BitUnsigned(InputStream in) throws IOException {
    int value = readByteOrDie(in);
    value |= readByteOrDie(in) << 8;
    return value;
  }

  /**
   * Reads 4 bytes from the current offset as an unsigned, 32-bit little-endian value.
   * @param in the stream to read from
   * @return the value as a java long
   * @throws IOException if unable to read
   */
  private static long read32BitUnsigned(InputStream in) throws IOException {
    long value = readByteOrDie(in);
    value |= ((long) readByteOrDie(in)) << 8;
    value |= ((long) readByteOrDie(in)) << 16;
    value |= ((long) readByteOrDie(in)) << 24;
    return value;
  }

  /**
   * Read exactly the specified amount of data into the specified buffer, throwing an exception if
   * unsuccessful.
   * @param in the stream to read from
   * @param buffer the buffer to file
   * @param offset the offset at which to start writing to the buffer
   * @param length the number of bytes to place into the buffer from the input stream
   * @throws IOException if unable to read
   */
  private static void readOrDie(InputStream in, byte[] buffer, int offset, int length)
      throws IOException {
    if (length < 0) {
      throw new IllegalArgumentException("length must be >= 0");
    }
    int numRead = 0;
    while (numRead < length) {
      int readThisRound = in.read(buffer, offset + numRead, length - numRead);
      if (numRead == -1) {
        throw new IOException("EOF");
      }
      numRead += readThisRound;
    }
  }

  /**
   * Parse one central directory entry, starting at the current file position.
   * @param in the input stream to read from, assumed to start at the first byte of the entry
   * @return the entry that was parsed
   * @throws IOException if unable to complete the parsing
   */
  public static MinimalZipEntry parseCentralDirectoryEntry(InputStream in) throws IOException {
    // *** 4 bytes encode the CENTRAL_DIRECTORY_ENTRY_SIGNATURE, verify for sanity
    // 2 bytes encode the version-made-by, ignore
    // 2 bytes encode the version-needed-to-extract, ignore
    // *** 2 bytes encode the general-purpose flags, read for language encoding. [READ THIS]
    // *** 2 bytes encode the compression method, [READ THIS]
    // 2 bytes encode the MSDOS last modified file time, ignore
    // 2 bytes encode the MSDOS last modified file date, ignore
    // *** 4 bytes encode the CRC32 of the uncompressed data [READ THIS]
    // *** 4 bytes encode the compressed size [READ THIS]
    // *** 4 bytes encode the uncompressed size [READ THIS]
    // *** 2 bytes encode the length of the file name [READ THIS]
    // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS]
    // *** 2 bytes encode the length of the comment, needed to skip the bytes later [READ THIS]
    // 2 bytes encode the disk number, ignore
    // 2 bytes encode the internal file attributes, ignore
    // 4 bytes encode the external file attributes, ignore
    // *** 4 bytes encode the offset of the local section entry, where the data is [READ THIS]
    // n bytes encode the file name
    // n bytes encode the extras
    // n bytes encode the comment
    if (((int) read32BitUnsigned(in)) != CENTRAL_DIRECTORY_ENTRY_SIGNATURE) {
      throw new ZipException("Bad central directory header");
    }
    skipOrDie(in, 2 + 2); // Skip version stuff
    int generalPurposeFlags = read16BitUnsigned(in);
    int compressionMethod = read16BitUnsigned(in);
    skipOrDie(in, 2 + 2); // Skip MSDOS junk
    long crc32OfUncompressedData = read32BitUnsigned(in);
    long compressedSize = read32BitUnsigned(in);
    long uncompressedSize = read32BitUnsigned(in);
    int fileNameLength = read16BitUnsigned(in);
    int extrasLength = read16BitUnsigned(in);
    int commentLength = read16BitUnsigned(in);
    skipOrDie(in, 2 + 2 + 4); // Skip the disk number and file attributes
    long fileOffsetOfLocalEntry = read32BitUnsigned(in);
    byte[] fileNameBuffer = new byte[fileNameLength];
    readOrDie(in, fileNameBuffer, 0, fileNameBuffer.length);
    skipOrDie(in, extrasLength + commentLength);
    // General purpose flag bit 11 is an important hint for the character set used for file names.
    boolean generalPurposeFlagBit11 = (generalPurposeFlags & (0x1 << 10)) != 0;
    return new MinimalZipEntry(
        compressionMethod,
        crc32OfUncompressedData,
        compressedSize,
        uncompressedSize,
        fileNameBuffer,
        generalPurposeFlagBit11,
        fileOffsetOfLocalEntry);
  }

  /**
   * Parses one local file entry and returns the offset from the first byte at which the compressed
   * data begins
   * @param in the input stream to read from, assumed to start at the first byte of the entry
   * @return as described
   * @throws IOException if unable to complete the parsing
   */
  public static long parseLocalEntryAndGetCompressedDataOffset(InputStream in) throws IOException {
    // *** 4 bytes encode the LOCAL_ENTRY_SIGNATURE, verify for sanity
    // 2 bytes encode the version-needed-to-extract, ignore
    // 2 bytes encode the general-purpose flags, ignore
    // 2 bytes encode the compression method, ignore (redundant with central directory)
    // 2 bytes encode the MSDOS last modified file time, ignore
    // 2 bytes encode the MSDOS last modified file date, ignore
    // 4 bytes encode the CRC32 of the uncompressed data, ignore (redundant with central directory)
    // 4 bytes encode the compressed size, ignore (redundant with central directory)
    // 4 bytes encode the uncompressed size, ignore (redundant with central directory)
    // *** 2 bytes encode the length of the file name, needed to skip the bytes later [READ THIS]
    // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS]
    // The rest is the data, which is the main attraction here.
    if (((int) read32BitUnsigned(in)) != LOCAL_ENTRY_SIGNATURE) {
      throw new ZipException("Bad local entry header");
    }
    int junkLength = 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4;
    skipOrDie(in, junkLength); // Skip everything up to the length of the file name
    final int fileNameLength = read16BitUnsigned(in);
    final int extrasLength = read16BitUnsigned(in);

    // The file name is already known and will match the central directory, so no need to read it.
    // The extra field length can be different here versus in the central directory and is used for
    // things like zipaligning APKs. This single value is the critical part as it dictates where the
    // actual DATA for the entry begins.
    return 4 + junkLength + 2 + 2 + fileNameLength + extrasLength;
  }

  /**
   * Find the end-of-central-directory record by scanning backwards from the end of a file looking
   * for the signature of the record.
   * @param in the file to read from
   * @param searchBufferLength the length of the search buffer, starting from the end of the file
   * @return the offset in the file at which the first byte of the EOCD signature is located, or -1
   * if the signature is not found in the search buffer
   * @throws IOException if there is a problem reading
   */
  public static long locateStartOfEocd(RandomAccessFileInputStream in, int searchBufferLength)
      throws IOException {
    final int maxBufferSize = (int) Math.min(searchBufferLength, in.length());
    final byte[] buffer = new byte[maxBufferSize];
    final long rangeStart = in.length() - buffer.length;
    in.setRange(rangeStart, buffer.length);
    readOrDie(in, buffer, 0, buffer.length);
    int offset = locateStartOfEocd(buffer);
    if (offset == -1) {
      return -1;
    }
    return rangeStart + offset;
  }

  /**
   * Find the end-of-central-directory record by scanning backwards looking for the signature of the
   * record.
   * @param buffer the buffer in which to search
   * @return the offset in the buffer at which the first byte of the EOCD signature is located, or
   * -1 if the complete signature is not found
   */
  public static int locateStartOfEocd(byte[] buffer) {
    int last4Bytes = 0; // This is the 32 bits of data from the file
    for (int offset = buffer.length - 1; offset >= 0; offset--) {
      last4Bytes <<= 8;
      last4Bytes |= buffer[offset];
      if (last4Bytes == EOCD_SIGNATURE) {
        return offset;
      }
    }
    return -1;
  }

  /**
   * Parse the end-of-central-directory record and return the critical information from it.
   * @param in the input stream to read from, assumed to start at the first byte of the entry
   * @return the metadata
   * @throws IOException if unable to read
   * @throws ZipException if the metadata indicates this is a zip64 archive, which is not supported
   */
  public static MinimalCentralDirectoryMetadata parseEocd(InputStream in)
      throws IOException, ZipException {
    if (((int) read32BitUnsigned(in)) != EOCD_SIGNATURE) {
      throw new ZipException("Bad eocd header");
    }

    // *** 4 bytes encode EOCD_SIGNATURE, ignore (already found and verified).
    // 2 bytes encode disk number for this archive, ignore.
    // 2 bytes encode disk number for the central directory, ignore.
    // 2 bytes encode num entries in the central directory on this disk, ignore.
    // *** 2 bytes encode num entries in the central directory overall [READ THIS]
    // *** 4 bytes encode the length of the central directory [READ THIS]
    // *** 4 bytes encode the file offset of the central directory [READ THIS]
    // 2 bytes encode the length of the zip file comment, ignore.
    // Everything else from here to the EOF is the zip file comment, or junk. Ignore.
    skipOrDie(in, 2 + 2 + 2);
    int numEntriesInCentralDirectory = read16BitUnsigned(in);
    if (numEntriesInCentralDirectory == 0xffff) {
      // If 0xffff, this is a zip64 archive and this code doesn't handle that.
      throw new ZipException("No support for zip64");
    }
    long lengthOfCentralDirectory = read32BitUnsigned(in);
    long offsetOfCentralDirectory = read32BitUnsigned(in);
    return new MinimalCentralDirectoryMetadata(
        numEntriesInCentralDirectory, offsetOfCentralDirectory, lengthOfCentralDirectory);
  }
}