scala.io.Codec Scala Examples

The following examples show how to use scala.io.Codec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

Example 1

Source File: constants.scala From Converter with GNU General Public License v3.0

5 votes

package org.scalablytyped.converter.internal

import java.net.URI
import java.time.ZoneId
import java.time.format.DateTimeFormatter
import java.util.Locale

import org.scalablytyped.converter.internal.environment.OpSystem

import scala.io.Codec

object constants {
  val defaultCacheFolder: os.Path = environment.OS match {
    case OpSystem.MAC     => os.home / "Library" / "Caches" / "ScalablyTyped"
    case OpSystem.WINDOWS => os.home / "AppData" / "Local" / "ScalablyTyped"
    case OpSystem.LINUX   => os.home / ".cache" / "scalablytyped"
    case OpSystem.UNKNOWN => os.home / ".cache" / "scalablytyped" // By default, Linux cache folder
  }
  val defaultLocalPublishFolder: os.Path = os.home / ".ivy2" / "local"

  val DefinitelyTypedRepo = new URI("https://github.com/DefinitelyTyped/DefinitelyTyped.git")
  val ConverterRepo       = new URI("https://github.com/ScalablyTyped/Converter.git")
  val isCi                = sys.env.get("CIRCLECI").isDefined
  val TimeZone            = ZoneId.of("UTC")
  val Utf8                = Codec.UTF8.charSet
  val DateTimePattern     = DateTimeFormatter ofPattern "yyyyMMddhhmm" withLocale Locale.ENGLISH withZone TimeZone
}

Example 2

Source File: JsonRequestSpec.scala From play-ws with Apache License 2.0

5 votes

package play.api.libs.ws.ahc

import java.nio.charset.StandardCharsets

import akka.actor.ActorSystem
import akka.stream.Materializer
import akka.util.ByteString
import org.mockito.Mockito.times
import org.mockito.Mockito.verify
import org.mockito.Mockito.when
import org.specs2.mock.Mockito

import org.specs2.mutable.Specification
import org.specs2.specification.AfterAll
import play.api.libs.json.JsString
import play.api.libs.json.JsValue
import play.api.libs.json.Json
import play.api.libs.ws.JsonBodyReadables
import play.api.libs.ws.JsonBodyWritables
import play.libs.ws.DefaultObjectMapper
import play.shaded.ahc.org.asynchttpclient.Response

import scala.io.Codec


class JsonRequestSpec extends Specification with Mockito with AfterAll with JsonBodyWritables {
  sequential

  implicit val system       = ActorSystem()
  implicit val materializer = Materializer.matFromSystem

  override def afterAll: Unit = {
    system.terminate()
  }

  "set a json node" in {
    val jsValue = Json.obj("k1" -> JsString("v1"))
    val client  = mock[StandaloneAhcWSClient]
    val req = new StandaloneAhcWSRequest(client, "http://playframework.com/", null)
      .withBody(jsValue)
      .asInstanceOf[StandaloneAhcWSRequest]
      .buildRequest()

    req.getHeaders.get("Content-Type") must be_==("application/json")
    ByteString.fromArray(req.getByteData).utf8String must be_==("""{"k1":"v1"}""")
  }

  "set a json node using the default object mapper" in {
    val objectMapper = DefaultObjectMapper.instance

    implicit val jsonReadable = body(objectMapper)
    val jsonNode              = objectMapper.readTree("""{"k1":"v1"}""")
    val client                = mock[StandaloneAhcWSClient]
    val req = new StandaloneAhcWSRequest(client, "http://playframework.com/", null)
      .withBody(jsonNode)
      .asInstanceOf[StandaloneAhcWSRequest]
      .buildRequest()

    req.getHeaders.get("Content-Type") must be_==("application/json")
    ByteString.fromArray(req.getByteData).utf8String must be_==("""{"k1":"v1"}""")
  }

  "read an encoding of UTF-8" in {
    val json = io.Source.fromResource("test.json")(Codec.ISO8859).getLines.mkString

    val ahcResponse = mock[Response]
    val response    = new StandaloneAhcWSResponse(ahcResponse)

    when(ahcResponse.getResponseBody(StandardCharsets.UTF_8)).thenReturn(json)
    when(ahcResponse.getContentType).thenReturn("application/json")

    val value: JsValue = JsonBodyReadables.readableAsJson.transform(response)
    verify(ahcResponse, times(1)).getResponseBody(StandardCharsets.UTF_8)
    verify(ahcResponse, times(1)).getContentType
    value.toString must beEqualTo(json)
  }

  "read an encoding of ISO-8859-1" in {
    val json = io.Source.fromResource("test.json")(Codec.ISO8859).getLines.mkString

    val ahcResponse = mock[Response]
    val response    = new StandaloneAhcWSResponse(ahcResponse)

    when(ahcResponse.getResponseBody(StandardCharsets.ISO_8859_1)).thenReturn(json)
    when(ahcResponse.getContentType).thenReturn("application/json;charset=iso-8859-1")

    val value: JsValue = JsonBodyReadables.readableAsJson.transform(response)
    verify(ahcResponse, times(1)).getResponseBody(StandardCharsets.ISO_8859_1)
    verify(ahcResponse, times(1)).getContentType
    value.toString must beEqualTo(json)
  }
}

Example 3

Source File: PrintUtils.scala From random-projections-at-berlinbuzzwords with Apache License 2.0

5 votes

package com.stefansavev

import java.io.PrintWriter

import scala.io.{Codec, Source}

object PrintUtils {
  def columnVectorToFile(fileName: String, v: Array[Double]): Unit = {
    val writer = new PrintWriter(fileName)
    for (a <- v) {
      writer.println(a.toString)
    }
    writer.close()
  }

  def stringsToFile(fileName: String, v: Array[String]): Unit = {
    val writer = new PrintWriter(fileName)
    for (a <- v) {
      writer.println(a)
    }
    writer.close()
  }

  def withPrintWriter(fileName: String, body: PrintWriter => Unit): Unit = {
    val writer = new PrintWriter(fileName, "UTF-8")
    body(writer)
    writer.close()
  }

}

object FileReadUtils {
  def withLinesIterator[T](fileName: String)(body: Iterator[String] => T): T = {
    val source = Source.fromFile(fileName)(Codec.UTF8)
    val result = body(source.getLines())
    source.close()
    result
  }
}

Example 4

Source File: CsvSinkTests.scala From kantan.csv with Apache License 2.0

5 votes

package kantan.csv

import java.io._
import laws.discipline.arbitrary._
import ops._
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
import scala.io.Codec

class CsvSinkTests extends AnyFunSuite with ScalaCheckPropertyChecks with Matchers {
  test("CSV data should be correctly written to an output stream (bit by bit)") {
    forAll(csv) { csv =>
      val out = new ByteArrayOutputStream()

      csv.foldLeft(out.asCsvWriter[List[String]](rfc))(_ write _).close()

      new String(out.toByteArray, Codec.UTF8.charSet) should be(csv.asCsv(rfc))
    }
  }

  test("CSV data should be correctly written to an output stream (in bulk)") {
    forAll(csv) { csv =>
      val out = new ByteArrayOutputStream()

      out.writeCsv(csv, rfc)

      new String(out.toByteArray, Codec.UTF8.charSet) should be(csv.asCsv(rfc))
    }
  }

  test("CSV data should be correctly written to a writer (bit by bit)") {
    forAll(csv) { csv =>
      val out = new StringWriter()

      csv.foldLeft(out.asCsvWriter[List[String]](rfc))(_ write _).close()

      out.toString should be(csv.asCsv(rfc))
    }
  }

  test("CSV data should be correctly written to a writer (in bulk)") {
    forAll(csv) { csv =>
      val out = new StringWriter()

      out.writeCsv(csv, rfc)

      out.toString should be(csv.asCsv(rfc))
    }
  }
}

Example 5

Source File: Report.scala From AppCrawler with Apache License 2.0

5 votes

package com.testerhome.appcrawler

import org.apache.commons.io.FileUtils
import org.scalatest.tools.Runner

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.{Source, Codec}
import scala.reflect.io.File
import collection.JavaConversions._


    log.info(s"run ${cmdArgs.mkString(" ")}")
    Runner.run(cmdArgs)
    changeTitle()
  }

  def changeTitle(title:String=Report.title): Unit ={
    val originTitle="ScalaTest Results"
    val indexFile=reportPath+"/index.html"
    val newContent=Source.fromFile(indexFile).mkString.replace(originTitle, title)
    scala.reflect.io.File(indexFile).writeAll(newContent)
  }

}

object Report extends Report{
  var showCancel=false
  var title="AppCrawler"
  var master=""
  var candidate=""
  var reportDir=""
  var store=new URIElementStore


  def loadResult(elementsFile: String): URIElementStore ={
    DataObject.fromYaml[URIElementStore](Source.fromFile(elementsFile).mkString)
  }
}

Example 6

Source File: HeaderCreator.scala From sbt-header with Apache License 2.0

5 votes

package de.heikoseeberger.sbtheader

import java.io.InputStream
import sbt.Logger
import scala.io.Codec

object HeaderCreator {

  def apply(
      fileType: FileType,
      commentStyle: CommentStyle,
      license: License,
      headerEmptyLine: Boolean,
      log: Logger,
      input: InputStream
  ): HeaderCreator =
    new HeaderCreator(fileType, commentStyle, license, headerEmptyLine, log, input)
}

final class HeaderCreator private (
    fileType: FileType,
    commentStyle: CommentStyle,
    license: License,
    headerEmptyLine: Boolean,
    log: Logger,
    input: InputStream
) {

  private val crlf          = """(?s)(?:.*)(\r\n)(?:.*)""".r
  private val cr            = """(?s)(?:.*)(\r)(?:.*)""".r
  private val headerPattern = commentStyle.pattern

  private val (firstLine, text) = {
    val fileContent =
      try scala.io.Source.fromInputStream(input)(Codec.UTF8).mkString
      finally input.close()
    fileType.firstLinePattern match {
      case Some(pattern) =>
        fileContent match {
          case pattern(first, rest) => (first, rest)
          case other                => ("", other)
        }
      case _ => ("", fileContent)
    }
  }

  log.debug(s"First line of file is:$newLine$firstLine")
  log.debug(s"Text of file is:$newLine$text")

  private val fileNewLine =
    text match {
      case crlf(_) => "\r\n"
      case cr(_)   => "\r"
      case _       => "\n"
    }

  private def newHeaderText(existingHeader: Option[String]) = {
    val suffix     = if (headerEmptyLine) "" else newLine
    val headerText = commentStyle(license, existingHeader).stripSuffix(suffix)
    val headerNewLine =
      headerText match {
        case crlf(_) => "\r\n"
        case cr(_)   => "\r"
        case _       => "\n"
      }
    headerText.replace(headerNewLine, fileNewLine)
  }

  private val modifiedText =
    text match {
      case headerPattern(existingText, body) =>
        val newText = newHeaderText(Some(existingText))
        if (newText == existingText) None
        else Some(firstLine + newText + body.replaceAll("""^\s+""", "")) // Trim left
      case body if body.isEmpty => None
      case body =>
        Some(firstLine + newHeaderText(None) + body.replaceAll("""^\s+""", "")) // Trim left
    }
  log.debug(s"Modified text of file is:$newLine$modifiedText")

  def createText: Option[String] =
    modifiedText
}

Example 7

Source File: ProcessBuilderUtils.scala From scalastringcourseday7 with Apache License 2.0

5 votes

package util

import java.io.ByteArrayInputStream
import java.nio.charset.{Charset, CodingErrorAction}

import text.StringOption

import scala.collection.mutable.ListBuffer
import scala.io.{Codec, Source}
import scala.sys.process.ProcessBuilder


object ProcessBuilderUtils {
  implicit def processToProcessUtils(repr: ProcessBuilder): ProcessBuilderUtils = {
    new ProcessBuilderUtils(repr)
  }
}

class ProcessBuilderUtils(repr: ProcessBuilder) {
  def lineStream(encoding: Charset,
                 onMalformedInput: CodingErrorAction,
                 onUnmappableCharacter: CodingErrorAction,
                 replacementOpt: StringOption): Iterator[String] = {
    val lines: Iterator[String] = repr.lineStream_!.iterator
    val byteBuffer = ListBuffer.empty[Byte]
    while (lines.hasNext) {
      val line: String = lines.next.trim concat "\n"
      byteBuffer ++= line.getBytes
    }
    implicit val codec = Codec(encoding).
      onMalformedInput(onMalformedInput).
      onUnmappableCharacter(onUnmappableCharacter)
    if (replacementOpt.nonEmpty) {
      codec.decodingReplaceWith(replacementOpt.get)
    }
    Source.fromInputStream(new ByteArrayInputStream(byteBuffer.toArray)).getLines
  }
}

Example 8

Source File: WeightedLevenshtein.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.annotators.spell.context

import com.github.liblevenshtein.transducer.{Candidate, ITransducer}
import com.johnsnowlabs.nlp.annotators.spell.context.parser.RegexParser

import scala.collection.mutable
import scala.io.Codec
import scala.math.min

trait WeightedLevenshtein {

  def levenshteinDist(s11: String, s22: String)(cost:(String, String) => Float): Float = {

    // cope with start of string
    val s1 = s"^${s11}_"
    val s2 = s"^${s22}_"

    val s1_ = s"_^${s11}_"
    val s2_ = s"_^${s22}_"

    val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f }

    for (j <- 1 to s2.length; i <- 1 to s1.length)
      dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1)
      else {
        minimum(dist(j - 1)(i) + cost(s2_.substring(j - 1, j + 1), s1(i - 1) + "Ɛ"),   //insert in s1
          dist(j)(i - 1) + cost(s2(j - 1) + "Ɛ", s1_.substring(i - 1, i + 1)),         //insert in s2
          dist(j - 1)(i - 1) + cost(s2(j - 1).toString, s1(i - 1).toString))
      }

    dist(s2.length)(s1.length)
  }

  
  def wLevenshteinDist(s1:String, s2:String, weights:Map[String, Map[String, Float]]) = levenshteinDist(s1, s2)(genCost(weights))

  def loadWeights(filename: String): Map[String, Map[String, Float]] = {
    // store word ids
    val vocabIdxs = mutable.HashMap[String, mutable.Map[String, Float]]()

    implicit val codec: Codec = Codec.UTF8

    scala.io.Source.fromFile(filename).getLines.foreach { case line =>
      val lineFields = line.split("\\|")
      val dist = vocabIdxs.getOrElse(lineFields(0), mutable.Map[String, Float]()).updated(lineFields(1), lineFields(2).toFloat)
      vocabIdxs.update(lineFields(0), dist)
    }
    vocabIdxs.toMap.mapValues(_.toMap)
  }


  private def genCost(weights: Map[String, Map[String, Float]])(a:String, b:String): Float = {
    if (weights.contains(a) && weights(a).contains(b))
      weights(a)(b)
    else if (a == b) {
      0.0f
    }
    else
      1.0f
  }

  private def minimum(i1: Float, i2: Float, i3: Float) = min(min(i1, i2), i3)


  def learnDist(s1: String, s2: String): Seq[(String, String)] = {
    val acc: Seq[(String, String)] = Seq.empty
    val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f }

    for (j <- 1 to s2.length; i <- 1 to s1.length)
      dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1)
      else minimum(
        dist(j - 1)(i) + 1.0f,
        dist(j)(i - 1) + 1.0f,
        dist(j - 1)(i - 1) + 1.0f)

    backTrack(dist, s2, s1, s2.length, s1.length, acc)
  }

  def backTrack(dist: Array[Array[Float]], s2:String, s1:String,
                j:Int, i:Int, acc:Seq[(String, String)]): Seq[(String, String)]= {

    if (s2(j-1) == s1(i-1)) {
      if (j == 1 && i == 1)
        acc
      else
        backTrack(dist, s2, s1, j - 1, i - 1, acc)
    }
    else {
      val pSteps = Map(dist(j - 1)(i) -> ("", s2(j - 1).toString, j - 1, i),
        dist(j)(i - 1) -> (s1(i - 1).toString, "", j, i - 1),
        dist(j - 1)(i - 1) -> (s1(i - 1).toString, s2(j - 1).toString, j - 1, i - 1))

      val best = pSteps.minBy(_._1)._2
      backTrack(dist, s2, s1, best._3, best._4, acc :+ (best._1, best._2))
    }
  }

}

Example 9

Source File: InvokeMigrationHandler.scala From flyway-awslambda with MIT License

5 votes

package crossroad0201.aws.flywaylambda

import java.io.{BufferedOutputStream, InputStream, OutputStream, PrintWriter}

import com.amazonaws.regions.{Region, Regions}
import com.amazonaws.services.lambda.runtime.{Context, RequestStreamHandler}
import com.amazonaws.services.s3.{AmazonS3, AmazonS3Client}

import scala.io.{BufferedSource, Codec}
import scala.util.{Failure, Success, Try}

class InvokeMigrationHandler extends RequestStreamHandler with S3MigrationHandlerBase {
  type BucketName = String
  type Prefix = String
  type ConfFileName = String

  override def handleRequest(input: InputStream, output: OutputStream, context: Context): Unit = {
    def parseInput: Try[(BucketName, Prefix, ConfFileName)] = Try {
      import spray.json._
      import DefaultJsonProtocol._

      val json = new BufferedSource(input)(Codec("UTF-8")).mkString
      val jsObj = JsonParser(json).toJson.asJsObject
      jsObj.getFields(
        "bucket_name",
        "prefix"
      ) match {
        case Seq(JsString(b), JsString(p)) => {
          jsObj.getFields(
            "flyway_conf"
          ) match {
            case Seq(JsString(c)) => (b, p, c)
            case _ => (b, p, "flyway.conf")
          }
        }
        case _ => throw new IllegalArgumentException(s"Missing require key [bucketName, prefix]. - $json")
      }
    }

    val logger = context.getLogger

    implicit val s3Client: AmazonS3 = new AmazonS3Client().withRegion(Region.getRegion(Regions.fromName(sys.env("AWS_REGION"))))

    (for {
      i <- parseInput
      _ = { logger.log(s"Flyway migration start. by invoke lambda function(${i._1}, ${i._2}, ${i._3}).") }
      r <- migrate(i._1, i._2, i._3)(context, s3Client)
    } yield r) match {
      case Success(r) =>
        logger.log(r)
        val b = r.getBytes("UTF-8")
        val bout = new BufferedOutputStream(output)
        Stream.continually(bout.write(b))
        bout.flush()
      case Failure(e) =>
        e.printStackTrace()
        val w = new PrintWriter(output)
        w.write(e.toString)
        w.flush()
    }
  }

}

Example 10

Source File: IngestSpec.scala From CSYE7200_Old with MIT License

5 votes

package edu.neu.coe.csye7200.asstmd

import org.scalatest.{FlatSpec, Matchers}

import scala.io.{Codec, Source}
import scala.util._


class IngestSpec extends FlatSpec with Matchers {

  behavior of "ingest"

  it should "work for Int" in {
    trait IngestibleInt extends Ingestible[Int] {
      def fromString(w: String): Try[Int] = Try(w.toInt)
    }
    implicit object IngestibleInt extends IngestibleInt
    val source = Source.fromChars(Array('x', '\n', '4', '2'))
    val ingester = new Ingest[Int]()
    val xys = ingester(source).toSeq
    // TO BE IMPLEMENTED check that xys has exactly one element, consisting of Success(42) -- 10 points
  }

  it should "work for movie database" in {
    implicit val codec: Codec = Codec("UTF-8")
    // NOTE that you expect to see a number of exceptions thrown. That's OK. We expect that some lines will not parse correctly.
    Try(Source.fromResource("movie_metadata.csv")) match {
      case Success(source) =>
        val ingester = new Ingest[Movie]()
        val mys: Seq[Try[Movie]] = (for (my <- ingester(source)) yield my.transform(
          { m => Success(m) }, { e => System.err.println(e); my }
        )).toSeq
        val mos: Seq[Option[Movie]] = for (my <- mys) yield for (m <- my.toOption; if m.production.country == "New Zealand") yield m
        val ms = mos.flatten
        ms.size shouldBe 4
        ms foreach { println(_) }
        source.close()
      case Failure(x) =>
        fail(x)
    }
  }

}

Example 11

Source File: IngestSpec.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.asstmd

import org.scalatest.{FlatSpec, Matchers}

import scala.io.{Codec, Source}
import scala.util._


class IngestSpec extends FlatSpec with Matchers {

  behavior of "ingest"

  it should "work for Int" in {
    trait IngestibleInt extends Ingestible[Int] {
      def fromString(w: String): Try[Int] = Try(w.toInt)
    }
    implicit object IngestibleInt extends IngestibleInt
    val source = Source.fromChars(Array('x', '\n', '4', '2'))
    val ingester = new Ingest[Int]()
    val xys = ingester(source).toSeq
    // check that xys has exactly one element, consisting of Success(42) -- 10 points
    // TO BE IMPLEMENTED
    ???
  }

  it should "work for movie database" in {
    implicit val codec: Codec = Codec("UTF-8")
    // NOTE that you expect to see a number of exceptions thrown. That's OK. We expect that some lines will not parse correctly.
    Try(Source.fromResource("movie_metadata.csv")) match {
      case Success(source) =>
        val ingester = new Ingest[Movie]()
        val mys: Seq[Try[Movie]] = (for (my <- ingester(source)) yield my.transform(
          { m => Success(m) }, { e => System.err.println(e); my }
        )).toSeq
        val mos: Seq[Option[Movie]] = for (my <- mys) yield for (m <- my.toOption; if m.production.country == "New Zealand") yield m
        val ms = mos.flatten
        ms.size shouldBe 4
        ms foreach { println(_) }
        source.close()
      case Failure(x) =>
        fail(x)
    }
  }

}

Example 12

Source File: ModelConfigurationParser.scala From modelmatrix with Apache License 2.0

5 votes

package com.collective.modelmatrix

import java.nio.charset.CodingErrorAction
import java.security.MessageDigest
import java.util.function.BiConsumer

import com.typesafe.config.{Config, ConfigValue}

import scala.io.Codec
import scalaz.{Failure, Success, ValidationNel}

class ModelConfigurationParser(config: Config, path: String = "features") {

  type FeatureDefinition = (String, ValidationNel[String, ModelFeature])

  private lazy val configLines: Seq[(String, Int)] = {
    implicit val codec = Codec("UTF-8")
    codec.onMalformedInput(CodingErrorAction.REPLACE)
    codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
    contentLines.zipWithIndex
  }

  // Try to find feature row index in original config if possible
  private def featureIndex(f: String): Int = {
    configLines.find(_._1.contains(f)).map(_._2).getOrElse(0)
  }

  private[this] val originUrl = config.origin().url()

  // configuration file as lines
  lazy val contentLines: Seq[String] = {
    if (originUrl != null) {
      scala.io.Source.fromURL(originUrl).getLines().toSeq
      // ideally this case below should never happen unless the Config passed in argument is not parsed from a file
    } else Seq.empty
  }

  // configuration file as a String
  lazy val content: String = contentLines.mkString(System.lineSeparator())

  // md5sum of the configuration content
  lazy val checksum: String = MessageDigest.getInstance("MD5").digest(content.getBytes).map("%02X".format(_)).mkString

  def features(): Seq[FeatureDefinition] = {
    val builder = collection.mutable.ListBuffer.empty[FeatureDefinition]

    config.getObject(path).forEach(new BiConsumer[String, ConfigValue] {
      def accept(t: String, u: ConfigValue): Unit = {
        val parsedFeature = ModelFeature.parse(t, u.atKey(t), t)
        builder += (t -> parsedFeature)
      }
    })

    builder.toSeq.sortBy {
      case (f, Success(feature)) => (true, featureIndex(feature.feature), feature.group, feature.feature)
      case (f, Failure(_)) => (false, featureIndex(f), "", f)
    }
  }
}

Example 13

Source File: utf8.scala From libisabelle with Apache License 2.0

5 votes

package isabelle


import java.nio.charset.Charset
import scala.io.Codec


object UTF8
{
  

  // see also https://en.wikipedia.org/wiki/UTF-8#Description
  // overlong encodings enable byte-stuffing of low-ASCII

  def decode_permissive(text: CharSequence): String =
  {
    val buf = new java.lang.StringBuilder(text.length)
    var code = -1
    var rest = 0
    def flush()
    {
      if (code != -1) {
        if (rest == 0 && Character.isValidCodePoint(code))
          buf.appendCodePoint(code)
        else buf.append('\uFFFD')
        code = -1
        rest = 0
      }
    }
    def init(x: Int, n: Int)
    {
      flush()
      code = x
      rest = n
    }
    def push(x: Int)
    {
      if (rest <= 0) init(x, -1)
      else {
        code <<= 6
        code += x
        rest -= 1
      }
    }
    for (i <- 0 until text.length) {
      val c = text.charAt(i)
      if (c < 128) { flush(); buf.append(c) }
      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
    }
    flush()
    buf.toString
  }

  private class Decode_Chars(decode: String => String,
    buffer: Array[Byte], start: Int, end: Int) extends CharSequence
  {
    def length: Int = end - start
    def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
    def subSequence(i: Int, j: Int): CharSequence =
      new Decode_Chars(decode, buffer, start + i, start + j)

    // toString with adhoc decoding: abuse of CharSequence interface
    override def toString: String = decode(decode_permissive(this))
  }

  def decode_chars(decode: String => String,
    buffer: Array[Byte], start: Int, end: Int): CharSequence =
  {
    require(0 <= start && start <= end && end <= buffer.length)
    new Decode_Chars(decode, buffer, start, end)
  }
}

Example 14

Source File: utf8.scala From libisabelle with Apache License 2.0

5 votes

package isabelle


import java.nio.charset.Charset
import scala.io.Codec


object UTF8
{
  

  // see also https://en.wikipedia.org/wiki/UTF-8#Description
  // overlong encodings enable byte-stuffing of low-ASCII

  def decode_permissive(text: CharSequence): String =
  {
    val buf = new java.lang.StringBuilder(text.length)
    var code = -1
    var rest = 0
    def flush()
    {
      if (code != -1) {
        if (rest == 0 && Character.isValidCodePoint(code))
          buf.appendCodePoint(code)
        else buf.append('\uFFFD')
        code = -1
        rest = 0
      }
    }
    def init(x: Int, n: Int)
    {
      flush()
      code = x
      rest = n
    }
    def push(x: Int)
    {
      if (rest <= 0) init(x, -1)
      else {
        code <<= 6
        code += x
        rest -= 1
      }
    }
    for (i <- 0 until text.length) {
      val c = text.charAt(i)
      if (c < 128) { flush(); buf.append(c) }
      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
    }
    flush()
    buf.toString
  }

  private class Decode_Chars(decode: String => String,
    buffer: Array[Byte], start: Int, end: Int) extends CharSequence
  {
    def length: Int = end - start
    def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
    def subSequence(i: Int, j: Int): CharSequence =
      new Decode_Chars(decode, buffer, start + i, start + j)

    // toString with adhoc decoding: abuse of CharSequence interface
    override def toString: String = decode(decode_permissive(this))
  }

  def decode_chars(decode: String => String,
    buffer: Array[Byte], start: Int, end: Int): CharSequence =
  {
    require(0 <= start && start <= end && end <= buffer.length)
    new Decode_Chars(decode, buffer, start, end)
  }
}

Example 15

Source File: utf8.scala From libisabelle with Apache License 2.0

5 votes

package isabelle


import java.nio.charset.Charset
import scala.io.Codec


object UTF8
{
  

  // see also http://en.wikipedia.org/wiki/UTF-8#Description
  // overlong encodings enable byte-stuffing of low-ASCII

  def decode_permissive(text: CharSequence): String =
  {
    val buf = new java.lang.StringBuilder(text.length)
    var code = -1
    var rest = 0
    def flush()
    {
      if (code != -1) {
        if (rest == 0 && Character.isValidCodePoint(code))
          buf.appendCodePoint(code)
        else buf.append('\uFFFD')
        code = -1
        rest = 0
      }
    }
    def init(x: Int, n: Int)
    {
      flush()
      code = x
      rest = n
    }
    def push(x: Int)
    {
      if (rest <= 0) init(x, -1)
      else {
        code <<= 6
        code += x
        rest -= 1
      }
    }
    for (i <- 0 until text.length) {
      val c = text.charAt(i)
      if (c < 128) { flush(); buf.append(c) }
      else if ((c & 0xC0) == 0x80) push(c & 0x3F)
      else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1)
      else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2)
      else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3)
    }
    flush()
    buf.toString
  }

  private class Decode_Chars(decode: String => String,
    buffer: Array[Byte], start: Int, end: Int) extends CharSequence
  {
    def length: Int = end - start
    def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char]
    def subSequence(i: Int, j: Int): CharSequence =
      new Decode_Chars(decode, buffer, start + i, start + j)

    // toString with adhoc decoding: abuse of CharSequence interface
    override def toString: String = decode(decode_permissive(this))
  }

  def decode_chars(decode: String => String,
    buffer: Array[Byte], start: Int, end: Int): CharSequence =
  {
    require(0 <= start && start <= end && end <= buffer.length)
    new Decode_Chars(decode, buffer, start, end)
  }
}