org.apache.commons.io.FilenameUtils Scala Examples

The following examples show how to use org.apache.commons.io.FilenameUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: AttributesComputation.scala    From nexus   with Apache License 2.0 5 votes vote down vote up
package ch.epfl.bluebrain.nexus.storage.attributes

import java.nio.file.{Files, Path}
import java.security.MessageDigest

import akka.http.scaladsl.model.HttpCharsets.`UTF-8`
import akka.http.scaladsl.model.MediaTypes.{`application/octet-stream`, `application/x-tar`}
import akka.http.scaladsl.model.{ContentType, MediaType, MediaTypes}
import akka.stream.Materializer
import akka.stream.scaladsl.{Keep, Sink}
import akka.util.ByteString
import cats.effect.Effect
import cats.implicits._
import ch.epfl.bluebrain.nexus.storage.File.{Digest, FileAttributes}
import ch.epfl.bluebrain.nexus.storage.StorageError.InternalError
import ch.epfl.bluebrain.nexus.storage._
import org.apache.commons.io.FilenameUtils

import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success, Try}

trait AttributesComputation[F[_], Source] {

  
  implicit def akkaAttributes[F[_]](implicit
      ec: ExecutionContext,
      mt: Materializer,
      F: Effect[F]
  ): AttributesComputation[F, AkkaSource] =
    (path: Path, algorithm: String) => {
      if (!Files.exists(path)) F.raiseError(InternalError(s"Path not found '$path'"))
      else
        Try(MessageDigest.getInstance(algorithm)) match {
          case Success(msgDigest) =>
            val isDir  = Files.isDirectory(path)
            val source = if (isDir) folderSource(path) else fileSource(path)
            source
              .alsoToMat(sinkSize)(Keep.right)
              .toMat(sinkDigest(msgDigest)) { (bytesF, digestF) =>
                (bytesF, digestF).mapN {
                  case (bytes, digest) => FileAttributes(path.toAkkaUri, bytes, digest, detectMediaType(path, isDir))
                }
              }
              .run()
              .to[F]
          case Failure(_)         => F.raiseError(InternalError(s"Invalid algorithm '$algorithm'."))
        }

    }
} 
Example 2
Source File: CCUtil.scala    From reforest   with Apache License 2.0 5 votes vote down vote up
package reforest.util

import org.apache.commons.io.FilenameUtils
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.{SparkConf, SparkContext}
import reforest.TypeInfo
import reforest.data.load.{ARFFUtil, DataLoad, LibSVMUtil}
import reforest.rf.RFCategoryInfo
import reforest.rf.parameter.RFParameter

import scala.reflect.ClassTag


  def getDataLoader[T:ClassTag, U:ClassTag](property : RFParameter,
                                             typeInfo: Broadcast[TypeInfo[T]],
                                   instrumented: Broadcast[GCInstrumented],
                                   categoryInfo: Broadcast[RFCategoryInfo]): DataLoad[T, U] = {
    val extension = FilenameUtils.getExtension(property.dataset).toUpperCase()

    property.fileType match {
      case "LIBSVM" => new LibSVMUtil(typeInfo, instrumented, categoryInfo)
      case "SVM" => new LibSVMUtil(typeInfo, instrumented, categoryInfo)
      case "ARFF" => new ARFFUtil(typeInfo, instrumented, categoryInfo)
      case _ => new LibSVMUtil(typeInfo, instrumented, categoryInfo)
    }
  }
} 
Example 3
Source File: Watcher.scala    From seed   with Apache License 2.0 5 votes vote down vote up
package seed.cli.util

import java.nio.file.{Files, Path, StandardWatchEventKinds, WatchEvent}

import org.slf4j.LoggerFactory
import zio._
import zio.stream._
import io.methvin.watcher.DirectoryChangeEvent
import io.methvin.watcher.DirectoryChangeEvent.EventType
import io.methvin.watcher.DirectoryChangeListener
import io.methvin.watcher.DirectoryWatcher
import io.methvin.watcher.hashing.FileHasher
import org.apache.commons.io.FilenameUtils
import org.slf4j.Logger

import scala.collection.JavaConverters
import scala.concurrent.ExecutionContext

object Watcher {
  val Extensions = Array("scala", "java")

  // System.setProperty(org.slf4j.impl.SimpleLogger.DEFAULT_LOG_LEVEL_KEY, "TRACE")

  def watchPaths(
    paths: List[Path],
    onStarted: () => Unit = () => ()
  ): Stream[Throwable, Unit] =
    Stream.effectAsyncM[Throwable, Unit] { e =>
      val logger = LoggerFactory.getLogger("watcher")
      val (p, f) = paths.partition(Files.isDirectory(_))
      val watcher = new CustomRecursiveFileMonitor(p, f, logger = logger) {
        override def onCreate(file: Path, count: Int): Unit =
          if (Extensions.contains(FilenameUtils.getExtension(file.toString)))
            e(Task.succeed(()))
        override def onModify(file: Path, count: Int): Unit =
          if (Extensions.contains(FilenameUtils.getExtension(file.toString)))
            e(Task.succeed(()))
        override def onDelete(file: Path, count: Int): Unit = {}
      }

      Task.descriptorWith { d =>
        val ec = d.executor.asEC
        UIO {
          watcher.start()(ec)
          onStarted()
        }.onTermination(_ => UIO(watcher.close()))
      }
    }
}


abstract class CustomRecursiveFileMonitor(
  val paths: List[Path],
  val files: List[Path],
  val fileHasher: Option[FileHasher] = Some(FileHasher.DEFAULT_FILE_HASHER),
  val logger: Logger
) {
  protected[this] val watcher: DirectoryWatcher = DirectoryWatcher.builder
    .paths(JavaConverters.seqAsJavaListConverter(paths).asJava)
    .files(JavaConverters.seqAsJavaListConverter(files).asJava)
    .listener(new DirectoryChangeListener {
      override def onEvent(event: DirectoryChangeEvent): Unit =
        event.eventType match {
          case EventType.OVERFLOW =>
          case et =>
            CustomRecursiveFileMonitor.this.onEvent(
              et.getWatchEventKind.asInstanceOf[WatchEvent.Kind[Path]],
              event.path,
              event.count
            )
        }
      override def onException(e: Exception): Unit = e.printStackTrace()
    })
    .fileHasher(fileHasher.orNull)
    .logger(logger)
    .build()

  def onEvent(eventType: WatchEvent.Kind[Path], file: Path, count: Int): Unit =
    eventType match {
      case StandardWatchEventKinds.ENTRY_CREATE => onCreate(file, count)
      case StandardWatchEventKinds.ENTRY_MODIFY => onModify(file, count)
      case StandardWatchEventKinds.ENTRY_DELETE => onDelete(file, count)
    }

  def start()(implicit executionContext: ExecutionContext): Unit =
    executionContext.execute(() => watcher.watch())

  def close(): Unit = watcher.close()

  def onCreate(file: Path, count: Int): Unit
  def onModify(file: Path, count: Int): Unit
  def onDelete(file: Path, count: Int): Unit
} 
Example 4
Source File: HadoopUtils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.image

import scala.language.existentials
import scala.util.Random

import org.apache.commons.io.FilenameUtils
import org.apache.hadoop.conf.{Configuration, Configured}
import org.apache.hadoop.fs.{Path, PathFilter}
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat

import org.apache.spark.sql.SparkSession

private object RecursiveFlag {
  
  def withPathFilter[T](
      sampleRatio: Double,
      spark: SparkSession,
      seed: Long)(f: => T): T = {
    val sampleImages = sampleRatio < 1
    if (sampleImages) {
      val flagName = FileInputFormat.PATHFILTER_CLASS
      val hadoopConf = spark.sparkContext.hadoopConfiguration
      val old = Option(hadoopConf.getClass(flagName, null))
      hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio)
      hadoopConf.setLong(SamplePathFilter.seedParam, seed)
      hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter])
      try f finally {
        hadoopConf.unset(SamplePathFilter.ratioParam)
        hadoopConf.unset(SamplePathFilter.seedParam)
        old match {
          case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter])
          case None => hadoopConf.unset(flagName)
        }
      }
    } else {
      f
    }
  }
} 
Example 5
Source File: EarlyStoppingMNIST.scala    From dl4scala   with MIT License 5 votes vote down vote up
package org.dl4scala.examples.misc.earlystopping

import java.util.Collections
import java.util.concurrent.TimeUnit

import org.apache.commons.io.FilenameUtils
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator
import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration
import org.deeplearning4j.earlystopping.saver.LocalFileModelSaver
import org.deeplearning4j.earlystopping.scorecalc.DataSetLossCalculator
import org.deeplearning4j.earlystopping.termination.{MaxEpochsTerminationCondition, MaxTimeIterationTerminationCondition}
import org.deeplearning4j.earlystopping.trainer.EarlyStoppingTrainer
import org.deeplearning4j.nn.api.OptimizationAlgorithm
import org.deeplearning4j.nn.conf.inputs.InputType
import org.deeplearning4j.nn.conf.layers.{ConvolutionLayer, DenseLayer, OutputLayer, SubsamplingLayer}
import org.deeplearning4j.nn.conf.{NeuralNetConfiguration, Updater}
import org.deeplearning4j.nn.weights.WeightInit
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.lossfunctions.LossFunctions
import scala.collection.JavaConverters._
import java.util

object EarlyStoppingMNIST {
  def main(args: Array[String]): Unit = {
    // Configure network://Configure network:
    val nChannels = 1
    val outputNum = 10
    val batchSize = 25
    val iterations = 1
    val seed = 123

    val configuration = new NeuralNetConfiguration.Builder()
      .seed(seed)
      .iterations(iterations)
      .regularization(true).l2(0.0005)
      .learningRate(0.02)
      .weightInit(WeightInit.XAVIER)
      .activation(Activation.RELU)
      .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
      .updater(Updater.NESTEROVS)
      .list()
      .layer(0, new ConvolutionLayer.Builder(5, 5)
        .nIn(nChannels)
        .stride(1, 1)
        .nOut(20).dropOut(0.5)
        .build())
      .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
        .kernelSize(2, 2)
        .stride(2, 2)
        .build())
      .layer(2, new DenseLayer.Builder()
        .nOut(500).build())
      .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
        .nOut(outputNum)
        .activation(Activation.SOFTMAX)
        .build())
      .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note in LenetMnistExample
      .backprop(true).pretrain(false).build()

    // Get data:
    val mnistTrain1024 = new MnistDataSetIterator(batchSize, 1024, false, true, true, 12345)
    val mnistTest512 = new MnistDataSetIterator(batchSize, 512, false, false, true, 12345)


    val tempDir = System.getProperty("java.io.tmpdir")
    val exampleDirectory = FilenameUtils.concat(tempDir, "DL4JEarlyStoppingExample/")
    val saver = new LocalFileModelSaver(exampleDirectory)

    val esConf = new EarlyStoppingConfiguration.Builder()
      .epochTerminationConditions(new MaxEpochsTerminationCondition(50)) //Max of 50 epochs
      .evaluateEveryNEpochs(1)
      .iterationTerminationConditions(new MaxTimeIterationTerminationCondition(20, TimeUnit.MINUTES)) //Max of 20 minutes
      .scoreCalculator(new DataSetLossCalculator(mnistTest512, true))     //Calculate test set score
      .modelSaver(saver)
      .build()

    val trainer = new EarlyStoppingTrainer(esConf, configuration, mnistTrain1024)

    //Conduct early stopping training://Conduct early stopping training:
    val result = trainer.fit()
    println("Termination reason: " + result.getTerminationReason)
    println("Termination details: " + result.getTerminationDetails)
    println("Total epochs: " + result.getTotalEpochs)
    println("Best epoch number: " + result.getBestModelEpoch)
    println("Score at best epoch: " + result.getBestModelScore)

    //Print score vs. epoch
    val scoreVsEpoch = result.getScoreVsEpoch

    val list = new util.ArrayList[Integer](scoreVsEpoch.keySet)
    Collections.sort(list)
    System.out.println("Score vs. Epoch:")
    for (i <- list.asScala) {
      println(i + "\t" + scoreVsEpoch.get(i))
    }
  }
} 
Example 6
Source File: HadoopUtils.scala    From spark-images   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.image

import java.nio.file.Paths

import org.apache.commons.io.FilenameUtils

import scala.sys.process._
import org.apache.hadoop.conf.{Configuration, Configured}
import org.apache.hadoop.fs.{Path, PathFilter}
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.spark.sql.SparkSession
import scala.language.existentials
import scala.util.Random

object RecursiveFlag {

  
  def setPathFilter(value: Option[Class[_]], sampleRatio: Option[Double] = None, spark: SparkSession)
  : Option[Class[_]] = {
    val flagName = FileInputFormat.PATHFILTER_CLASS
    val hadoopConf = spark.sparkContext.hadoopConfiguration
    val old = Option(hadoopConf.getClass(flagName, null))
    if (sampleRatio.isDefined) {
      hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio.get)
    } else {
      hadoopConf.unset(SamplePathFilter.ratioParam)
      None
    }

    value match {
      case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter])
      case None => hadoopConf.unset(flagName)
    }
    old
  }
} 
Example 7
Source File: SparkSqlRunner.scala    From amaterasu   with Apache License 2.0 5 votes vote down vote up
package org.apache.amaterasu.executor.execution.actions.runners.spark.SparkSql

import java.io.File

import org.apache.amaterasu.common.execution.actions.Notifier
import org.apache.amaterasu.common.logging.Logging
import org.apache.amaterasu.common.runtime.Environment
import org.apache.commons.io.FilenameUtils
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, SparkSession}



  def findFileType(folderName: File): Array[String] = {
    // get all the files from a directory
    val files: Array[File] = folderName.listFiles()
    val extensions: Array[String] = files.map(file => FilenameUtils.getExtension(file.toString))
    extensions
  }

}

object SparkSqlRunner {

  def apply(env: Environment,
            jobId: String,
            actionName: String,
            notifier: Notifier,
            sc: SparkContext): SparkSqlRunner = {

    val sparkSqlRunnerObj = new SparkSqlRunner

    sparkSqlRunnerObj.env = env
    sparkSqlRunnerObj.jobId = jobId
    sparkSqlRunnerObj.actionName = actionName
    sparkSqlRunnerObj.notifier = notifier
    sparkSqlRunnerObj.sc = sc
    sparkSqlRunnerObj.spark = SparkSession.builder().config(sc.getConf).enableHiveSupport().getOrCreate()
    sparkSqlRunnerObj
  }
}