java.text.NumberFormat Scala Examples

The following examples show how to use java.text.NumberFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SQLMetrics.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.metric

import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.SparkContext
import org.apache.spark.scheduler.AccumulableInfo
import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils}


class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] {
  // This is a workaround for SPARK-11013.
  // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will
  // update it at the end of task and the value will be at least 0. Then we can filter out the -1
  // values before calculate max, min, etc.
  private[this] var _value = initValue
  private var _zeroValue = initValue

  override def copy(): SQLMetric = {
    val newAcc = new SQLMetric(metricType, _value)
    newAcc._zeroValue = initValue
    newAcc
  }

  override def reset(): Unit = _value = _zeroValue

  override def merge(other: AccumulatorV2[Long, Long]): Unit = other match {
    case o: SQLMetric => _value += o.value
    case _ => throw new UnsupportedOperationException(
      s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
  }

  override def isZero(): Boolean = _value == _zeroValue

  override def add(v: Long): Unit = _value += v

  def +=(v: Long): Unit = _value += v

  override def value: Long = _value

  // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later
  override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = {
    new AccumulableInfo(
      id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER))
  }
}


object SQLMetrics {
  private val SUM_METRIC = "sum"
  private val SIZE_METRIC = "size"
  private val TIMING_METRIC = "timing"

  def createMetric(sc: SparkContext, name: String): SQLMetric = {
    val acc = new SQLMetric(SUM_METRIC)
    acc.register(sc, name = Some(name), countFailedValues = false)
    acc
  }

  
  def stringValue(metricsType: String, values: Seq[Long]): String = {
    if (metricsType == SUM_METRIC) {
      val numberFormat = NumberFormat.getIntegerInstance(Locale.ENGLISH)
      numberFormat.format(values.sum)
    } else {
      val strFormat: Long => String = if (metricsType == SIZE_METRIC) {
        Utils.bytesToString
      } else if (metricsType == TIMING_METRIC) {
        Utils.msDurationToString
      } else {
        throw new IllegalStateException("unexpected metrics type: " + metricsType)
      }

      val validValues = values.filter(_ >= 0)
      val Seq(sum, min, med, max) = {
        val metric = if (validValues.isEmpty) {
          Seq.fill(4)(0L)
        } else {
          val sorted = validValues.sorted
          Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1))
        }
        metric.map(strFormat)
      }
      s"\n$sum ($min, $med, $max)"
    }
  }
} 
Example 2
Source File: NumericPartialFunctions.scala    From dsentric   with Apache License 2.0 5 votes vote down vote up
package dsentric

import java.text.NumberFormat

object NumericPartialFunctions {

  private val numericRegex = "^[\\+-]{0,1}(\\d{1,3}[\\.', ](\\d{3}[\\.', ])*\\d{3}([\\.,]\\d*)?|\\d*([\\.,]\\d*)?)$".r
  private val numberFormat = NumberFormat.getInstance()
  private def isNumeric(s:String) =
    !s.isEmpty && numericRegex.pattern.matcher(s).matches() && s != "-" && s != "+" && s != "." && s != ","

  def byte:PartialFunction[Any, Byte] = {
    case n:Byte => n
    case n:Short if n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte
    case n:Int if n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte
    case n:Long if n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte
    case n:Double if n % 1 == 0 && n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte
    case n:Float if n % 1 == 0 && n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte
  }

  def short:PartialFunction[Any, Short] = {
    case n:Byte => n
    case n:Short => n
    case n:Int if n <= Short.MaxValue && n >= Short.MinValue => n.toShort
    case n:Long if n <= Short.MaxValue && n >= Short.MinValue => n.toShort
    case n:Double if n % 1 == 0 && n <= Short.MaxValue && n >= Short.MinValue => n.toShort
    case n:Float if n % 1 == 0 && n <= Short.MaxValue && n >= Short.MinValue => n.toShort
  }

  def int:PartialFunction[Any, Int] = {
    case n:Int => n
    case n:Long if n <= Int.MaxValue && n >= Int.MinValue => n.toInt
    case n:Double if n % 1 == 0 && n <= Int.MaxValue && n >= Int.MinValue => n.toInt
    case n:Float if n % 1 == 0 && n <= Int.MaxValue && n >= Int.MinValue => n.toInt
    case n:Short => n
    case n:Byte => n
  }

  def long:PartialFunction[Any, Long] = {
    case n:Int => n
    case n:Long => n
    case n:Double if n % 1 == 0 && n <= Long.MaxValue && n >= Long.MinValue => n.toLong
    case n:Float if n % 1 == 0 && n <= Long.MaxValue && n >= Long.MinValue => n.toLong
    case n:Short => n
    case n:Byte => n
  }

  def float:PartialFunction[Any, Float] = {
    case n:Float => n
    case n:Int => n
    case n:Long => n
    case n:Double if n <= Float.MaxValue && n >= Float.MinValue => n.toFloat
    case n:Short => n
    case n:Byte => n
  }

  def double:PartialFunction[Any, Double] = {
    case n:Double => n
    case n:Long => n
    case n:Float => n
    case n:Int => n
    case n:Short => n
    case n:Byte => n
  }

  def number:PartialFunction[Any, Number] = {
    case n:Double => n
    case n:Long => n
    case n:Float => n
    case n:Int => n
    case n:Short => n
    case n:Byte => n
  }

  def stringDouble:PartialFunction[Any, Double] = {
    case s:String if isNumeric(s) =>
      numberFormat.parse(s).doubleValue()
  }
} 
Example 3
Source File: JobProgressUtil.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.spark.utils

import java.text.NumberFormat

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.protocol.engine.JobProgressInfo
import org.apache.commons.lang.time.DateFormatUtils
import org.apache.spark.{JobExecutionStatus, SparkContext, SparkJobInfo}


object JobProgressUtil extends Logging{
  def progress(sc: SparkContext, jobGroup : String):Float = {
    val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup)
    val jobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }
    val stages = jobs.flatMap { job =>
      job.stageIds().flatMap(sc.statusTracker.getStageInfo)
    }

    val taskCount = stages.map(_.numTasks).sum
    val completedTaskCount = stages.map(_.numCompletedTasks).sum
    if (taskCount == 0) {
      0f
    } else {
      (completedTaskCount.toDouble / taskCount).toFloat
    }
  }

  def getActiveJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = {
    val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup)
    val activeJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.RUNNING)
    val progressInfos = activeJobs.map { job =>
      val jobProgressInfo = getJobProgressInfoByStages(job, sc, jobGroup)
      val timestamp = DateFormatUtils.format(System.currentTimeMillis, "yyyy-MM-dd HH:mm:ss")
      val progress = jobProgressInfo.succeedTasks * 1d /  jobProgressInfo.totalTasks
      info(s"${jobProgressInfo.id} numTasks = ${jobProgressInfo.totalTasks}, numCompletedTasks = ${jobProgressInfo.succeedTasks}," +
        s" numActiveTasks = ${jobProgressInfo.runningTasks} , completed:${percentageFormat(progress)}")
      jobProgressInfo
    }
    progressInfos
  }

  def getCompletedJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = {
    val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup)
    val completedJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.SUCCEEDED)
    val progressInfos = completedJobs.map { job =>
      getJobProgressInfoByStages(job, sc, jobGroup)
    }
    progressInfos
  }

  private  def getJobProgressInfoByStages(job:SparkJobInfo, sc:SparkContext, jobGroup : String) : JobProgressInfo = {
    val stages = job.stageIds().flatMap(sc.statusTracker.getStageInfo)

    var numTasks = 0
    var numActiveTasks = 0
    var numFailedTasks = 0
    var numSucceedTasks = 0
    stages.foreach{stageInfo =>
      if (stageInfo.submissionTime() > 0){
        numTasks += stageInfo.numTasks()
        numActiveTasks += stageInfo.numActiveTasks()
        numFailedTasks += stageInfo.numFailedTasks()
        numSucceedTasks += stageInfo.numCompletedTasks()
      }
    }
    JobProgressInfo(getJobId(job.jobId(), jobGroup), numTasks, numActiveTasks, numFailedTasks, numSucceedTasks)
  }

  private def getJobId( jobId : Int , jobGroup : String ): String = "jobId-" + jobId + "(" + jobGroup + ")"

  private var _percentFormat: NumberFormat = _

  def percentageFormat(decimal: Double): String = {
    if(_percentFormat == null) {
      _percentFormat = NumberFormat.getPercentInstance
      _percentFormat.setMinimumFractionDigits(2)
    }
    _percentFormat.format(decimal)
  }
} 
Example 4
Source File: TypeCast.scala    From spark-select   with Apache License 2.0 5 votes vote down vote up
package io.minio.spark.select.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.{SimpleDateFormat, NumberFormat}
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try


  @throws[IllegalArgumentException]
  private[select] def toChar(str: String): Char = {
    if (str.charAt(0) == '\\') {
      str.charAt(1)
      match {
        case 't' => '\t'
        case 'r' => '\r'
        case 'b' => '\b'
        case 'f' => '\f'
        case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
        case '\'' => '\''
        case 'u' if str == """\u0000""" => '\u0000'
        case _ =>
          throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
      }
    } else if (str.length == 1) {
      str.charAt(0)
    } else {
      throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
    }
  }
} 
Example 5
Source File: ExcelOutputWriter.scala    From spark-hadoopoffice-ds   with Apache License 2.0 5 votes vote down vote up
package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

} 
Example 6
Source File: package.scala    From CMAK   with Apache License 2.0 5 votes vote down vote up
package kafka.manager

import java.nio.charset.StandardCharsets
import java.text.NumberFormat


package object utils {
  import org.json4s._
  import org.json4s.jackson.JsonMethods._
  import org.json4s.jackson.Serialization.{read, write}
  implicit val formats = DefaultFormats
  private[this] val numberFormat = NumberFormat.getInstance()
  
  implicit class LongFormatted(val x: Long) {
    def formattedAsDecimal = numberFormat.format(x)  
  }

  implicit def serializeString(data: String) : Array[Byte] = {
    data.getBytes(StandardCharsets.UTF_8)
  }

  implicit def deserializeString(data: Array[Byte]) : String  = {
    new String(data, StandardCharsets.UTF_8)
  }

  def toJson(map: Map[String, Any]): String = {
    write(map)
  }
  
  def toJson(s: String) : String = {
    "\"" + s + "\""
  }

  def fromJson[T](s: String) : T = {
    read(s)
  }

  def parseJson(s: String) : JValue = {
    parse(s)
  }

  @throws[UtilException]
  def checkCondition(cond: Boolean, error: UtilError) : Unit = {
    if(!cond) {
      throw new UtilException(error)
    }
  }

  @throws[UtilException]
  def throwError [T] (error: UtilError) : T = {
    throw new UtilException(error)
  }
} 
Example 7
Source File: SQLMetrics.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.metric

import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.SparkContext
import org.apache.spark.scheduler.AccumulableInfo
import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils}


class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] {
  // This is a workaround for SPARK-11013.
  // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will
  // update it at the end of task and the value will be at least 0. Then we can filter out the -1
  // values before calculate max, min, etc.
  private[this] var _value = initValue
  private var _zeroValue = initValue

  override def copy(): SQLMetric = {
    val newAcc = new SQLMetric(metricType, _value)
    newAcc._zeroValue = initValue
    newAcc
  }

  override def reset(): Unit = _value = _zeroValue

  override def merge(other: AccumulatorV2[Long, Long]): Unit = other match {
    case o: SQLMetric => _value += o.value
    case _ => throw new UnsupportedOperationException(
      s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
  }

  override def isZero(): Boolean = _value == _zeroValue

  override def add(v: Long): Unit = _value += v

  def +=(v: Long): Unit = _value += v

  override def value: Long = _value

  // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later
  override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = {
    new AccumulableInfo(
      id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER))
  }
}


object SQLMetrics {
  private val SUM_METRIC = "sum"
  private val SIZE_METRIC = "size"
  private val TIMING_METRIC = "timing"

  def createMetric(sc: SparkContext, name: String): SQLMetric = {
    val acc = new SQLMetric(SUM_METRIC)
    acc.register(sc, name = Some(name), countFailedValues = false)
    acc
  }

  
  def stringValue(metricsType: String, values: Seq[Long]): String = {
    if (metricsType == SUM_METRIC) {
      val numberFormat = NumberFormat.getIntegerInstance(Locale.US)
      numberFormat.format(values.sum)
    } else {
      val strFormat: Long => String = if (metricsType == SIZE_METRIC) {
        Utils.bytesToString
      } else if (metricsType == TIMING_METRIC) {
        Utils.msDurationToString
      } else {
        throw new IllegalStateException("unexpected metrics type: " + metricsType)
      }

      val validValues = values.filter(_ >= 0)
      val Seq(sum, min, med, max) = {
        val metric = if (validValues.isEmpty) {
          Seq.fill(4)(0L)
        } else {
          val sorted = validValues.sorted
          Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1))
        }
        metric.map(strFormat)
      }
      s"\n$sum ($min, $med, $max)"
    }
  }
} 
Example 8
Source File: JVMUtil.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.jawa.core.util

import java.io.{BufferedReader, InputStreamReader}
import java.net.URLClassLoader
import java.text.NumberFormat

 
object JVMUtil {
	def startSecondJVM[C](clazz: Class[C], jvmArgs: List[String], args: List[String], redirectStream: Boolean): Int = {
    val separator = System.getProperty("file.separator")
    val classpath = Thread.currentThread().getContextClassLoader.asInstanceOf[URLClassLoader].getURLs.map(_.getPath()).reduce((c1, c2) => c1 + java.io.File.pathSeparator + c2)
    val path = System.getProperty("java.home") + separator + "bin" + separator + "java"
    val commands: IList[String] = List(path) ::: jvmArgs ::: List("-cp", classpath, clazz.getCanonicalName.stripSuffix("$")) ::: args
    import scala.collection.JavaConverters._
    val processBuilder = new ProcessBuilder(commands.asJava)
    processBuilder.redirectErrorStream(redirectStream)
    val process = processBuilder.start()
    val is = process.getInputStream
    val isr = new InputStreamReader(is)
    val br = new BufferedReader(isr)
    var line = br.readLine()
    while (line != null) {
      println(line)
      line = br.readLine()
    }
    process.waitFor()
  }
  
  def showMemoryUsage(): Unit = {
    val runtime = Runtime.getRuntime
    val format = NumberFormat.getInstance()
    
    val sb = new StringBuilder()
    val maxMemory = runtime.maxMemory()
    val allocatedMemory = runtime.totalMemory()
    val freeMemory = runtime.freeMemory()
    
    sb.append("free memory: " + format.format(freeMemory / 1024 / 1024) + " ")
    sb.append("allocated memory: " + format.format(allocatedMemory / 1024 / 1024) + " ")
    sb.append("max memory: " + format.format(maxMemory / 1024 / 1024) + " ")
    sb.append("total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024 / 1024) + " ")
    println(sb.toString())
  }
} 
Example 9
Source File: SQLMetrics.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.metric

import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.SparkContext
import org.apache.spark.scheduler.AccumulableInfo
import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils}


class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] {
  // This is a workaround for SPARK-11013.
  // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will
  // update it at the end of task and the value will be at least 0. Then we can filter out the -1
  // values before calculate max, min, etc.
  private[this] var _value = initValue
  private var _zeroValue = initValue

  override def copy(): SQLMetric = {
    val newAcc = new SQLMetric(metricType, _value)
    newAcc._zeroValue = initValue
    newAcc
  }

  override def reset(): Unit = _value = _zeroValue

  override def merge(other: AccumulatorV2[Long, Long]): Unit = other match {
    case o: SQLMetric => _value += o.value
    case _ => throw new UnsupportedOperationException(
      s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
  }

  override def isZero(): Boolean = _value == _zeroValue

  override def add(v: Long): Unit = _value += v

  def +=(v: Long): Unit = _value += v

  override def value: Long = _value

  // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later
  override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = {
    new AccumulableInfo(
      id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER))
  }
}


object SQLMetrics {
  private val SUM_METRIC = "sum"
  private val SIZE_METRIC = "size"
  private val TIMING_METRIC = "timing"

  def createMetric(sc: SparkContext, name: String): SQLMetric = {
    val acc = new SQLMetric(SUM_METRIC)
    acc.register(sc, name = Some(name), countFailedValues = false)
    acc
  }

  
  def stringValue(metricsType: String, values: Seq[Long]): String = {
    if (metricsType == SUM_METRIC) {
      val numberFormat = NumberFormat.getIntegerInstance(Locale.US)
      numberFormat.format(values.sum)
    } else {
      val strFormat: Long => String = if (metricsType == SIZE_METRIC) {
        Utils.bytesToString
      } else if (metricsType == TIMING_METRIC) {
        Utils.msDurationToString
      } else {
        throw new IllegalStateException("unexpected metrics type: " + metricsType)
      }

      val validValues = values.filter(_ >= 0)
      val Seq(sum, min, med, max) = {
        val metric = if (validValues.isEmpty) {
          Seq.fill(4)(0L)
        } else {
          val sorted = validValues.sorted
          Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1))
        }
        metric.map(strFormat)
      }
      s"\n$sum ($min, $med, $max)"
    }
  }
} 
Example 10
Source File: TypeCast.scala    From mimir   with Apache License 2.0 5 votes vote down vote up
package mimir.exec.spark.datasource.google.spreadsheet

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheet] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
} 
Example 11
Source File: SandboxApp.scala    From bloom-filter-scala   with MIT License 5 votes vote down vote up
import java.text.NumberFormat

import bloomfilter.mutable.{CuckooFilter, UnsafeTable8Bit}
import com.google.monitoring.runtime.instrumentation.{AllocationRecorder, Sampler}
import com.twitter.algebird.{BloomFilter => AlgebirdBloomFilter}

import scala.util.Random

object SandboxApp {

  def checkMemory(): Unit = {
    val runtime = Runtime.getRuntime

    val format = NumberFormat.getInstance()

    val sb = new StringBuilder()
    val maxMemory = runtime.maxMemory()
    val allocatedMemory = runtime.totalMemory()
    val freeMemory = runtime.freeMemory()

    sb.append("free memory: " + format.format(freeMemory / 1024) + "\n")
    sb.append("allocated memory: " + format.format(allocatedMemory / 1024) + "\n")
    sb.append("max memory: " + format.format(maxMemory / 1024) + "\n")
    sb.append("total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n")
    System.out.println(sb.toString())
  }


  def main(args: Array[String]): Unit = {

    val sut = CuckooFilter[Long](1000)
    sut.add(8)
    assert(sut.mightContain(8))
    sut.add(10)
    assert(sut.mightContain(10))
    sut.add(8)
    assert(sut.mightContain(8))
    sut.add(10000)
    assert(sut.mightContain(10000))

  }

  def compareAlgebirdFPR(): Unit = {

    val random: Random = new Random()

    val itemsExpected = 10000L
    val falsePositiveRate = 0.1
    var bf = AlgebirdBloomFilter(itemsExpected.toInt, falsePositiveRate, 0).create("")
    val bf2 = bloomfilter.mutable.BloomFilter[String](itemsExpected, falsePositiveRate)

    var i = 0
    while (i < itemsExpected) {
      val str: String = random.nextString(1000)
      bf = bf.+(str)
      bf2.add(str)
      i += 1
    }

    i = 0
    var in, in2 = 0
    while (true) {
      val str = random.nextString(1000)
      if (bf.contains(str).isTrue) {
        in += 1
      }
      if (bf2.mightContain(str)) {
        in2 += 1
      }

      if (i % 1000 == 0) {
        println(s"in: $in; in2: $in2")
      }
    }


  }

  def checkAllocations(): Unit = {
    val sampler: Sampler = new Sampler() {
      def sampleAllocation(count: Int, desc: String, newObj: Object, size: Long) {
        System.out.println("I just allocated the object " + newObj +
          " of type " + desc + " whose size is " + size)
        if (count != -1) {
          System.out.println("It's an array of size " + count)
        }
      }
    }

    AllocationRecorder.addSampler(sampler)

    AllocationRecorder.removeSampler(sampler)

  }
} 
Example 12
Source File: ThreadLocalNumberFormat.scala    From perfolation   with MIT License 5 votes vote down vote up
package perfolation

import java.math.RoundingMode
import java.text.NumberFormat
import java.util.Currency

object ThreadLocalNumberFormat {
  // Make sure the platform is initialized
  Platform

  private val threadLocalNumberFormat = new ThreadLocal[NumberFormat]{
    override protected def initialValue(): NumberFormat = NumberFormat.getInstance()
  }

  protected[perfolation] def apply(i: Int,
                                   f: Int,
                                   maxI: Int,
                                   maxF: Int,
                                   g: Boolean,
                                   c: Option[Currency],
                                   rm: RoundingMode): NumberFormat = {
    val nf = threadLocalNumberFormat.get()
    nf.setGroupingUsed(g)
    c.foreach(nf.setCurrency)
    nf.setMaximumFractionDigits(if (maxF == -1) f else maxF)
    nf.setMinimumFractionDigits(f)
    nf.setMaximumIntegerDigits(if (maxI == -1) i else maxI)
    nf.setMinimumIntegerDigits(i)
    nf.setParseIntegerOnly(maxF == 0)
    nf.setRoundingMode(rm)
    nf
  }

} 
Example 13
Source File: PlyOutputWriter.scala    From spark-iqmulus   with Apache License 2.0 5 votes vote down vote up
package fr.ign.spark.iqmulus.ply

import org.apache.spark.sql.types._
import org.apache.hadoop.mapreduce.{ TaskAttemptID, RecordWriter, TaskAttemptContext, JobContext }
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
import java.io.DataOutputStream
import org.apache.spark.sql.sources.OutputWriter
import org.apache.hadoop.io.{ NullWritable, BytesWritable }
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.fs.Path
import java.text.NumberFormat
import org.apache.spark.sql.{ Row, SQLContext, sources }
import fr.ign.spark.iqmulus.RowOutputStream

class PlyOutputWriter(
  name: String,
  context: TaskAttemptContext,
  dataSchema: StructType,
  element: String,
  littleEndian: Boolean
)
    extends OutputWriter {

  private val file = {
    val path = getDefaultWorkFile(s".ply.$element")
    val fs = path.getFileSystem(context.getConfiguration)
    fs.create(path)
  }

  private var count = 0L

  // strip out ids
  private val schema = StructType(dataSchema.filterNot { Seq("fid", "pid") contains _.name })

  private val recordWriter = new RowOutputStream(new DataOutputStream(file), littleEndian, schema, dataSchema)

  def getDefaultWorkFile(extension: String): Path = {
    val uniqueWriteJobId = context.getConfiguration.get("spark.sql.sources.writeJobUUID")
    val taskAttemptId: TaskAttemptID = context.getTaskAttemptID
    val split = taskAttemptId.getTaskID.getId
    new Path(name, f"$split%05d-$uniqueWriteJobId$extension")
  }

  override def write(row: Row): Unit = {
    recordWriter.write(row)
    count += 1
  }

  override def close(): Unit = {
    recordWriter.close

    // write header
    val path = getDefaultWorkFile(".ply.header")
    val fs = path.getFileSystem(context.getConfiguration)
    val dos = new java.io.DataOutputStream(fs.create(path))
    val header = new PlyHeader(path.toString, littleEndian, Map(element -> ((count, schema))))
    header.write(dos)
    dos.close
  }
} 
Example 14
Source File: LasOutputWriter.scala    From spark-iqmulus   with Apache License 2.0 5 votes vote down vote up
package fr.ign.spark.iqmulus.las

import org.apache.spark.sql.types._
import org.apache.hadoop.mapreduce.{ TaskAttemptID, RecordWriter, TaskAttemptContext }
import java.io.DataOutputStream
import org.apache.spark.sql.sources.OutputWriter
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.hadoop.io.{ NullWritable, BytesWritable }
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.fs.Path
import java.text.NumberFormat
import org.apache.spark.sql.{ Row, SQLContext, sources }
import fr.ign.spark.iqmulus.RowOutputStream

class LasOutputWriter(
  name: String,
  context: TaskAttemptContext,
  dataSchema: StructType,
  formatOpt: Option[Byte] = None,
  version: Version = Version(),
  offset: Array[Double] = Array(0F, 0F, 0F),
  scale: Array[Double] = Array(0.01F, 0.01F, 0.01F)
)
    extends OutputWriter {

  private val file = {
    val path = getDefaultWorkFile("/1.pdr")
    val fs = path.getFileSystem(context.getConfiguration)
    fs.create(path)
  }

  private val pmin = Array.fill[Double](3)(Double.PositiveInfinity)
  private val pmax = Array.fill[Double](3)(Double.NegativeInfinity)
  private val countByReturn = Array.fill[Long](15)(0)
  private def count = countByReturn.sum

  private val format = formatOpt.getOrElse(LasHeader.formatFromSchema(dataSchema))

  // todo, extra bytes
  private val schema = LasHeader.schema(format)
  private def header =
    new LasHeader(name, format, count, pmin, pmax, scale, offset, countByReturn)

  private val recordWriter = new RowOutputStream(new DataOutputStream(file), littleEndian = true, schema, dataSchema)

  def getDefaultWorkFile(extension: String): Path = {
    val uniqueWriteJobId = context.getConfiguration.get("spark.sql.sources.writeJobUUID")
    val taskAttemptId: TaskAttemptID = context.getTaskAttemptID
    val split = taskAttemptId.getTaskID.getId
    new Path(name, f"$split%05d-$uniqueWriteJobId$extension")
  }

  override def write(row: Row): Unit = {
    recordWriter.write(row)

    // gather statistics for the header
    val x = offset(0) + scale(0) * row.getAs[Int]("x").toDouble
    val y = offset(1) + scale(1) * row.getAs[Int]("y").toDouble
    val z = offset(2) + scale(2) * row.getAs[Int]("z").toDouble
    val ret = row.getAs[Byte]("flags") & 0x3
    countByReturn(ret) += 1
    pmin(0) = Math.min(pmin(0), x)
    pmin(1) = Math.min(pmin(1), y)
    pmin(2) = Math.min(pmin(2), z)
    pmax(0) = Math.max(pmax(0), x)
    pmax(1) = Math.max(pmax(1), y)
    pmax(2) = Math.max(pmax(2), z)
  }

  override def close(): Unit = {
    recordWriter.close

    // write header
    val path = getDefaultWorkFile("/0.header")
    val fs = path.getFileSystem(context.getConfiguration)
    val dos = new java.io.DataOutputStream(fs.create(path))
    header.write(dos)
    dos.close

    // copy header and pdf to a final las file (1 per split)
    org.apache.hadoop.fs.FileUtil.copyMerge(
      fs, getDefaultWorkFile("/"),
      fs, getDefaultWorkFile(".las"),
      true, context.getConfiguration, ""
    )
  }
} 
Example 15
Source File: TypeCast.scala    From spark-google-spreadsheets   with Apache License 2.0 5 votes vote down vote up
package com.github.potix2.spark.google.spreadsheets.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheets] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
} 
Example 16
Source File: Bencharts.scala    From rtree2d   with Apache License 2.0 5 votes vote down vote up
import java.awt.{Color, Paint}
import java.text.NumberFormat

import javax.imageio.ImageIO
import org.jfree.chart.JFreeChart
import org.jfree.chart.axis.LogarithmicAxis
import org.jfree.chart.plot.{DefaultDrawingSupplier, XYPlot}
import org.jfree.chart.renderer.xy.XYErrorRenderer
import org.jfree.data.xy.{YIntervalSeries, YIntervalSeriesCollection}
import sbt._
import com.github.plokhotnyuk.jsoniter_scala.macros._
import com.github.plokhotnyuk.jsoniter_scala.core._
import com.github.plokhotnyuk.jsoniter_scala.macros.JsonCodecMaker._

import scala.collection.SortedMap


  def apply(jmhReport: File, yAxisTitle: String, targetDir: File): Unit = {
    val allResults = readFromArray(IO.readBytes(jmhReport))(make[Seq[BenchmarkResult]](CodecMakerConfig))
    val constParams = allResults.flatMap(_.params.toSeq).groupBy(_._1).collect {
      case (_, kvs) if kvs.distinct.size == 1 => kvs.head
    }.toSeq
    allResults.groupBy(benchmarkName(constParams)).foreach { case (benchmark, results) =>
      val dataset = new YIntervalSeriesCollection {
        SortedMap(results.groupBy(otherParams(constParams)).toSeq:_*).foreach { case (params, iterations) =>
          addSeries(new YIntervalSeries(params) {
            iterations.foreach { iteration =>
              val x = iteration.params.get("size").fold(0.0)(_.toDouble)
              val y = Math.max(iteration.primaryMetric.score, 1.0)
              val yLow = Math.max(iteration.primaryMetric.scoreConfidence._1, 1.0)
              val yHigh = Math.max(iteration.primaryMetric.scoreConfidence._2, 1.0)
              add(x, y, yLow, yHigh)
            }
          })
        }
      }
      val renderer = new XYErrorRenderer {
        (0 to dataset.getSeriesCount).foreach(i => setSeriesLinesVisible(i, true))
      }
      val plot = new XYPlot(dataset, axis("Size"), axis(yAxisTitle), renderer) {
        setDrawingSupplier(new DefaultDrawingSupplier {
          override def getNextPaint: Paint = super.getNextPaint match {
            case x: Color if x.getRed > 200 && x.getGreen > 200 =>
              new Color(x.getRed, (x.getGreen * 0.8).toInt, x.getBlue, x.getAlpha)
            case x => x
          }
        })
      }
      val chart = new JFreeChart(benchmark, JFreeChart.DEFAULT_TITLE_FONT, plot, true)
      ImageIO.write(chart.createBufferedImage(1200, 900), "png", targetDir / s"$benchmark.png")
    }
  }

  private def axis(title: String): LogarithmicAxis = new LogarithmicAxis(title) {
    setAllowNegativesFlag(true)
    setNumberFormatOverride(NumberFormat.getInstance())
  }

  private def benchmarkName(constParams: Seq[(String, String)])(result: BenchmarkResult): String = {
    val benchName = result.benchmark.split("""\.""").last
    constParams.map { case (k, v) =>
      s"$k=$v"
    }.sorted.mkString(s"$benchName[", ",", "]")
  }

  private def otherParams(constParams: Seq[(String, String)])(result: BenchmarkResult): String = {
    val constParamNames = constParams.map(_._1).toSet
    val benchSuitName = result.benchmark.split("""\.""").reverse.tail.head
    result.params.filterKeys(k => k != "size" && !constParamNames(k)).map { case (k, v) =>
      s"$k=$v"
    }.toSeq.sorted.mkString(s"$benchSuitName[", ",", "]")
  }
}

case class BenchmarkMetric(score: Double, scoreConfidence: (Double, Double))

case class BenchmarkResult(benchmark: String, params: Map[String, String], primaryMetric: BenchmarkMetric)