org.apache.commons.compress.archivers.tar.TarArchiveInputStream Scala Examples

The following examples show how to use org.apache.commons.compress.archivers.tar.TarArchiveInputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: ImageLoaderUtils.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.loaders

import java.awt.image.BufferedImage
import java.io.{InputStream, ByteArrayInputStream}
import java.net.URI
import java.util.zip.GZIPInputStream
import javax.imageio.ImageIO

import keystoneml.loaders.VOCLoader._
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import keystoneml.pipelines.Logging
import keystoneml.utils._

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag

object ImageLoaderUtils extends Logging {
  
  def loadFiles[L, I <: AbstractLabeledImage[L] : ClassTag](
      filePathsRDD: RDD[URI],
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I, // TODO(etrain): We can probably do this with implicits.
      namePrefix: Option[String] = None): RDD[I] = {
    filePathsRDD.flatMap(fileUri => loadFile(fileUri, labelsMap, imageBuilder, namePrefix))
  }

  private def loadFile[L, I <: AbstractLabeledImage[L]](
      fileUri: URI,
      labelsMap: String => L,
      imageBuilder: (Image, L, Option[String]) => I,
      namePrefix: Option[String]): Iterator[I] = {
    val filePath = new Path(fileUri)
    val conf = new Configuration(true)
    val fs = FileSystem.get(filePath.toUri(), conf)
    val fStream = fs.open(filePath)

    val tarStream = new ArchiveStreamFactory().createArchiveInputStream(
      "tar", fStream).asInstanceOf[TarArchiveInputStream]

    var entry = tarStream.getNextTarEntry()
    val imgs = new ArrayBuffer[I]
    while (entry != null) {
      if (!entry.isDirectory && (namePrefix.isEmpty || entry.getName.startsWith(namePrefix.get))) {
        var offset = 0
        var ret = 0
        val content = new Array[Byte](entry.getSize().toInt)
        while (ret >= 0 && offset != entry.getSize()) {
          ret = tarStream.read(content, offset, content.length - offset)
          if (ret >= 0) {
            offset += ret
          }
        }

        val bais = new ByteArrayInputStream(content)

        val image = ImageUtils.loadImage(bais).map { img =>
          imageBuilder(img, labelsMap(entry.getName), Some(entry.getName))
        }

        imgs ++= image
      }
      entry = tarStream.getNextTarEntry()
    }

    imgs.iterator
  }
} 
Example 2
Source File: TarFlowSpec.scala    From nexus   with Apache License 2.0 5 votes vote down vote up
package ch.epfl.bluebrain.nexus.storage

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Path, Paths}

import akka.actor.ActorSystem
import akka.stream.alpakka.file.scaladsl.Directory
import akka.stream.scaladsl.{FileIO, Source}
import akka.testkit.TestKit
import akka.util.ByteString
import ch.epfl.bluebrain.nexus.storage.utils.{EitherValues, IOEitherValues, Randomness}
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.io.FileUtils
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatest.{BeforeAndAfterAll, Inspectors, OptionValues}

import scala.annotation.tailrec

class TarFlowSpec
    extends TestKit(ActorSystem("TarFlowSpec"))
    with AnyWordSpecLike
    with Matchers
    with IOEitherValues
    with Randomness
    with EitherValues
    with OptionValues
    with Inspectors
    with BeforeAndAfterAll {

  val basePath = Files.createTempDirectory("tarflow")
  val dir1     = basePath.resolve("one")
  val dir2     = basePath.resolve("two")

  override def afterAll(): Unit = {
    super.afterAll()
    FileUtils.cleanDirectory(basePath.toFile)
    ()
  }

  type PathAndContent = (Path, String)

  "A TarFlow" should {

    Files.createDirectories(dir1)
    Files.createDirectories(dir2)

    def relativize(path: Path): String = basePath.getParent().relativize(path).toString

    "generate the byteString for a tar file correctly" in {
      val file1        = dir1.resolve("file1.txt")
      val file1Content = genString()
      val file2        = dir1.resolve("file3.txt")
      val file2Content = genString()
      val file3        = dir2.resolve("file3.txt")
      val file3Content = genString()
      val files        = List(file1 -> file1Content, file2 -> file2Content, file3 -> file3Content)
      forAll(files) {
        case (file, content) => Source.single(ByteString(content)).runWith(FileIO.toPath(file)).futureValue
      }
      val byteString   = Directory.walk(basePath).via(TarFlow.writer(basePath)).runReduce(_ ++ _).futureValue
      val bytes        = new ByteArrayInputStream(byteString.toArray)
      val tar          = new TarArchiveInputStream(bytes)

      @tailrec def readEntries(
          tar: TarArchiveInputStream,
          entries: List[PathAndContent] = Nil
      ): List[PathAndContent] = {
        val entry = tar.getNextTarEntry
        if (entry == null) entries
        else {
          val data = Array.ofDim[Byte](entry.getSize.toInt)
          tar.read(data)
          readEntries(tar, (Paths.get(entry.getName) -> ByteString(data).utf8String) :: entries)
        }
      }
      val directories = List(relativize(basePath) -> "", relativize(dir1) -> "", relativize(dir2) -> "")
      val untarred    = readEntries(tar).map { case (path, content) => path.toString -> content }
      val expected    = files.map { case (path, content) => relativize(path) -> content } ++ directories
      untarred should contain theSameElementsAs expected
    }
  }

} 
Example 3
Source File: UtilCommands.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
import java.io.{BufferedInputStream, File}
import java.nio.file.{Files, Paths}
import java.security.MessageDigest
import scala.util.control.Breaks._

import javax.xml.bind.DatatypeConverter
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.IOUtils
object UtilCommands {
  val OSX_NAME = "Mac OS X"

  val linuxSshpass = if (Files.exists(Paths.get("bin/utils/sshpass"))) "bin/utils/sshpass" else "sshpass"
  val osxSshpass = "/usr/local/bin/sshpass"

  val sshpass = if (isOSX) osxSshpass else linuxSshpass

  def isOSX = System.getProperty("os.name") == OSX_NAME

  def verifyComponentConfNotChanged(componentName:String, configFilePath:String, expectedHash:String) = {
    val confContent = UtilCommands.unTarGz("./components", componentName, configFilePath)
    UtilCommands.checksum(componentName, configFilePath, confContent, expectedHash)
  }

  def checksum(componentName:String, configFilePath:String, confContent:Array[Byte], expectedHash:String) = {
    val actualHash = MessageDigest.getInstance("MD5").digest(confContent)
    val actualHashStr = DatatypeConverter.printHexBinary(actualHash)
    if (!expectedHash.equalsIgnoreCase(actualHashStr))
      throw new Exception(s"$componentName configuration file $configFilePath has been changed, please change the template accordingly " +
        s"(the new digest is $actualHashStr)")
  }

  def unTarGz(rootFolder:String, componentName: String, configFilePath:String):Array[Byte] = {
    var tarArchiveInputStream:TarArchiveInputStream = null
    var bufferInputstream:BufferedInputStream = null
    val gzipCompressor:GzipCompressorInputStream = null
    var confContent: Array[Byte] = null
    try {
      val libDir = new File(rootFolder)
      val pathInput = libDir.listFiles().filter(file => file.getName.contains(componentName))
      val path = Paths.get(pathInput(0).getAbsolutePath)
      val bufferInputStream = new BufferedInputStream(Files.newInputStream(path))
      val gzipCompressor = new GzipCompressorInputStream(bufferInputStream)
      tarArchiveInputStream = new TarArchiveInputStream(gzipCompressor)

      var archiveEntry: ArchiveEntry = null
      archiveEntry = tarArchiveInputStream.getNextEntry
      if(archiveEntry.getName == "./")
        archiveEntry = tarArchiveInputStream.getNextEntry

      val extractFolder = archiveEntry.getName.replaceAll("^\\./","").split("/")(0)
      while (archiveEntry != null) {
        breakable {
        if (archiveEntry.getName.replaceAll("^\\./","") == s"$extractFolder/$configFilePath") {
            confContent = IOUtils.toByteArray(tarArchiveInputStream)
            break
          }
        }
        archiveEntry = tarArchiveInputStream.getNextEntry
      }
    }
    finally {
      if(tarArchiveInputStream != null)
        tarArchiveInputStream.close()
      if(bufferInputstream != null)
        bufferInputstream.close()
      if(gzipCompressor != null)
        gzipCompressor.close()
    }
    confContent
  }
} 
Example 4
Source File: CompressedFiles.scala    From tensorflow_scala   with Apache License 2.0 5 votes vote down vote up
package org.platanios.tensorflow.data.utilities

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.utils.IOUtils

import java.io.{File, FileOutputStream, InputStream}
import java.nio.file.{Files, Path}
import java.util.zip.GZIPInputStream


object CompressedFiles {
  def decompressTGZ(tgzFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    decompressTGZStream(Files.newInputStream(tgzFilePath), destinationPath, bufferSize)
  }

  def decompressTar(tarFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    decompressTarStream(Files.newInputStream(tarFilePath), destinationPath, bufferSize)
  }

  def decompressTGZStream(tgzStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    decompressTarStream(new GZIPInputStream(tgzStream), destinationPath, bufferSize)
  }

  def decompressTarStream(tarStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = {
    val inputStream = new TarArchiveInputStream(tarStream)
    var entry = inputStream.getNextTarEntry
    while (entry != null) {
      if (!entry.isDirectory) {
        val currentFile = new File(destinationPath.toAbsolutePath.toString, entry.getName)
        val parentFile = currentFile.getParentFile
        if (!parentFile.exists)
          parentFile.mkdirs()
        IOUtils.copy(inputStream, new FileOutputStream(currentFile))
      }
      entry = inputStream.getNextTarEntry
    }
    inputStream.close()
  }
} 
Example 5
Source File: FileUtils.scala    From mimir   with Apache License 2.0 5 votes vote down vote up
package mimir.util

import java.lang.reflect.Method
import java.net.URL
import java.io.{File, FileOutputStream, BufferedOutputStream, InputStream}
import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream}
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import java.io.BufferedInputStream
import java.nio.file.Files

object FileUtils {
  def getListOfFiles(dir: String):List[File] = {
    val d = new File(dir)
    if (d.exists && d.isDirectory) {
        d.listFiles.filter(_.isFile).toList
    } else {
        List[File]()
    }
  }
  def addJarToClasspath(jar: File): Unit = {
// Get the ClassLoader class
    val cl: ClassLoader = ClassLoader.getSystemClassLoader
    val clazz: Class[_] = cl.getClass
// Get the protected addURL method from the parent URLClassLoader class
    val method: Method =
      clazz.getSuperclass.getDeclaredMethod("addURL", Seq(classOf[URL]):_*)
// Run projected addURL method to add JAR to classpath
    method.setAccessible(true)
    method.invoke(cl, Seq(jar.toURI().toURL()):_*)
  }
  
  def untar(in:InputStream, destinationDir: String): File = {

    val dest = new File(destinationDir)
    dest.mkdir()

    var tarIn: TarArchiveInputStream = null

    try {
      tarIn = new TarArchiveInputStream(
        new GzipCompressorInputStream(
          new BufferedInputStream(in)))
      var tarEntry = tarIn.getNextTarEntry
      while (tarEntry != null) {

        // create a file with the same name as the tarEntry
        val destPath = new File(dest, tarEntry.getName)
        if (tarEntry.isDirectory) {
          destPath.mkdirs()
        } else {
          // Create any necessary parent dirs
          val parent = destPath.getParentFile
          if (!Files.exists(parent.toPath)) {
            parent.mkdirs()
          }

          destPath.createNewFile()

          val btoRead = new Array[Byte](1024)
          var bout: BufferedOutputStream = null

          try {
            bout = new BufferedOutputStream(new FileOutputStream(destPath))

            var len = 0
            while (len != -1) {
              len = tarIn.read(btoRead)
              if (len != -1) {
                bout.write(btoRead, 0, len)
              }
            }
          } finally {
            if (bout != null) {
              bout.close()
            }
          }
        }
        tarEntry = tarIn.getNextTarEntry
      }
    } finally {
      if (tarIn != null) {
        tarIn.close()
      }
    }
    dest
  }
  
} 
Example 6
Source File: ArchiveUtils.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.util


import org.slf4j.LoggerFactory
import org.apache.commons.compress.archivers.tar.TarArchiveEntry
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.FileUtils
import org.apache.commons.io.IOUtils
import java.io._
import java.util.zip.GZIPInputStream
import java.util.zip.ZipInputStream

      tarIn.close()
    }
    else if (file.endsWith(".gz")) {
      val is2 = new GZIPInputStream(fin)
      val extracted = new File(target.getParent, target.getName.replace(".gz", ""))
      if (extracted.exists) extracted.delete
      extracted.createNewFile
      val fos = FileUtils.openOutputStream(extracted)
      IOUtils.copyLarge(is2, fos)
      is2.close()
      fos.flush()
      fos.close()
    }
    target.delete
  }
} 
Example 7
Source File: DataUtilities.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.examples.utilities

import java.io._

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.slf4j.{Logger, LoggerFactory}


object DataUtilities {
  val logger: Logger = LoggerFactory.getLogger(DataUtilities.getClass)
  private val BUFFER_SIZE = 4096

  @throws(classOf[IOException])
  def extractTarGz(filePath: String, outputPath: String): Unit = {
    var fileCount = 0
    var dirCount = 0

    logger.info("Extracting files")

    val tais = new TarArchiveInputStream(new GzipCompressorInputStream(
      new BufferedInputStream(new FileInputStream(filePath))))
    // Read the tar entries using the getNextEntry method
    Stream.continually(tais.getNextTarEntry).takeWhile(_ !=null).foreach{ entry =>
      // Create directories as required
      if (entry.isDirectory) {
        new File(outputPath + "/" + entry.getName).mkdirs
        dirCount += 1
      } else {
        val data = new Array[Byte](BUFFER_SIZE)
        val fos = new FileOutputStream(outputPath + "/" + entry.getName)
        val dest = new BufferedOutputStream(fos, BUFFER_SIZE)
        Stream.continually(tais.read(data, 0, BUFFER_SIZE)).takeWhile(_ != -1).foreach{ count =>
          dest.write(data, 0, count)
        }
        dest.close()
        fileCount = fileCount + 1
      }
      if (fileCount % 1000 == 0) logger.info(".")
    }

    tais.close()
  }
} 
Example 8
Source File: TgzTransformerSpec.scala    From releaser   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.releaser

import java.io._
import java.nio.file.{Files, Path}

import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream}
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.FileUtils
import org.scalatest._

import scala.collection.mutable.ListBuffer
import scala.util.{Failure, Success}

class TgzTransformerSpec extends WordSpec with Matchers with BeforeAndAfterEach with OptionValues with TryValues{

  val tgzPath = new File(this.getClass.getResource("/help-frontend/uk/gov/hmrc/help-frontend_2.11/1.26.0-3-gd7ed03c/help-frontend_2.11-1.26.0-3-gd7ed03c.tgz").toURI).toPath

  var transformer:TgzTransformer = _
  val candidate_1_26_0_3_gd7ed03c = ReleaseCandidateVersion("1.26.0-3-gd7ed03c")
  val release_1_4_0 = ReleaseVersion("1.4.0")
  var tmpDir:Path = _

  override def beforeEach(){
    tmpDir = Files.createTempDirectory("tmp")
    transformer = new TgzTransformer()
    FileUtils.copyFileToDirectory(tgzPath.toFile, tmpDir.toFile)
  }

  override def afterEach(){
    FileUtils.deleteDirectory(tmpDir.toFile)
  }

  "the transformer" should {

    "decompress the tgz, rename the main folder and compress it back" in {

      val inFile = new File(tmpDir.toFile, tgzPath.getFileName.toString).toPath
      val targetFilePath = tmpDir.resolve("help-frontend-1.4.0.tgz")

      val originalTarEntries = listTgzEntries(inFile)
      assertTarEntry(originalTarEntries, "./help-frontend-1.26.0-3-gd7ed03c/")
      assertTarEntry(originalTarEntries, "./help-frontend-1.4.0/", exists = false)
      assertTarEntry(originalTarEntries, "./start-docker.sh", mode = Some(493))

      val outFileTry = transformer(inFile, "help-frontend", candidate_1_26_0_3_gd7ed03c, release_1_4_0, targetFilePath)
      outFileTry match {
        case Success(outFile) =>
          val tarEntries = listTgzEntries(targetFilePath)
          assertTarEntry(tarEntries, "./help-frontend-1.26.0-3-gd7ed03c/", exists = false)
          assertTarEntry(tarEntries, "./help-frontend-1.4.0/")
          assertTarEntry(tarEntries, "./start-docker.sh", mode = Some(493))
        case Failure(e) => fail("Caught exception: " + e.getMessage, e)
      }


    }
  }

  private def listTgzEntries(localTgzFile: Path) : List[TarArchiveEntry] =  {
    val bytes = new Array[Byte](2048)
    val fin = new BufferedInputStream(new FileInputStream(localTgzFile.toFile))
    val gzIn = new GzipCompressorInputStream(fin)
    val tarIn = new TarArchiveInputStream(gzIn)

    val entries = ListBuffer[TarArchiveEntry]()

    Iterator continually tarIn.getNextTarEntry takeWhile (null !=) foreach { tarEntry =>
      entries += tarEntry
    }

    tarIn.close()

    entries.toList

  }

  private def assertTarEntry(tarEntries: List[TarArchiveEntry], entryName: String, exists: Boolean = true, mode: Option[Int] = None) = {
    val entryOption = tarEntries.find(_.getName == entryName)
    entryOption match {
      case Some(entry) =>
        exists shouldBe true
        mode.foreach { m => m shouldBe entry.getMode}
      case None => exists shouldBe false
    }

  }

} 
Example 9
Source File: Tar.scala    From libisabelle   with Apache License 2.0 5 votes vote down vote up
package info.hupel.isabelle.setup

import java.net.URL
import java.nio.file._
import java.nio.file.attribute.PosixFilePermissions

import scala.util.Try

import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream}
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.lang3.SystemUtils


object Tar {

  val execPermissions = PosixFilePermissions.fromString("rwxr-xr-x")

  def download(url: URL): Try[TarArchiveInputStream] =
    Try(new TarArchiveInputStream(new GzipCompressorInputStream(url.openStream())))

  def extractTo(path: Path, tar: TarArchiveInputStream): Try[Path] = Try {
    def next() = Option(tar.getNextTarEntry())

    @annotation.tailrec
    def go(entry: Option[TarArchiveEntry], paths: List[Path]): List[Path] = entry match {
      case None =>
        paths.reverse
      case Some(entry) =>
        val name = entry.getName
        val subpath = path.resolve(name).normalize

        if (subpath.startsWith(path) && !Files.exists(subpath, LinkOption.NOFOLLOW_LINKS)) {
          Files.createDirectories(subpath.getParent)
          if (entry.isDirectory)
            Files.createDirectory(subpath)
          else if (entry.isSymbolicLink)
            Files.createSymbolicLink(subpath, Paths.get(entry.getLinkName))
          else if (entry.isLink)
            Files.createLink(subpath, path.resolve(Paths.get(entry.getLinkName)))
          else if (entry.isFile) {
            Files.copy(tar, subpath)
            if (!SystemUtils.IS_OS_WINDOWS && (entry.getMode % 2 == 1))
              Files.setPosixFilePermissions(subpath, execPermissions)
          }
          else
            sys.error("unknown tar file entry")
        }
        else
          sys.error("malicious tar file or file already exists")

        val p = if (entry.isDirectory) List(subpath) else Nil

        go(next(), p ::: paths)
    }

    go(next(), Nil).foldLeft(List.empty[Path]) { (roots, path) =>
      if (roots.exists(path.startsWith))
        roots
      else
        path :: roots
    } match {
      case List(root) => root
      case _ => sys.error("untarring created more than one root directory")
    }
  }

}