scala.io.Codec Scala Examples
The following examples show how to use scala.io.Codec.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: constants.scala From Converter with GNU General Public License v3.0 | 5 votes |
package org.scalablytyped.converter.internal import java.net.URI import java.time.ZoneId import java.time.format.DateTimeFormatter import java.util.Locale import org.scalablytyped.converter.internal.environment.OpSystem import scala.io.Codec object constants { val defaultCacheFolder: os.Path = environment.OS match { case OpSystem.MAC => os.home / "Library" / "Caches" / "ScalablyTyped" case OpSystem.WINDOWS => os.home / "AppData" / "Local" / "ScalablyTyped" case OpSystem.LINUX => os.home / ".cache" / "scalablytyped" case OpSystem.UNKNOWN => os.home / ".cache" / "scalablytyped" // By default, Linux cache folder } val defaultLocalPublishFolder: os.Path = os.home / ".ivy2" / "local" val DefinitelyTypedRepo = new URI("https://github.com/DefinitelyTyped/DefinitelyTyped.git") val ConverterRepo = new URI("https://github.com/ScalablyTyped/Converter.git") val isCi = sys.env.get("CIRCLECI").isDefined val TimeZone = ZoneId.of("UTC") val Utf8 = Codec.UTF8.charSet val DateTimePattern = DateTimeFormatter ofPattern "yyyyMMddhhmm" withLocale Locale.ENGLISH withZone TimeZone }
Example 2
Source File: JsonRequestSpec.scala From play-ws with Apache License 2.0 | 5 votes |
package play.api.libs.ws.ahc import java.nio.charset.StandardCharsets import akka.actor.ActorSystem import akka.stream.Materializer import akka.util.ByteString import org.mockito.Mockito.times import org.mockito.Mockito.verify import org.mockito.Mockito.when import org.specs2.mock.Mockito import org.specs2.mutable.Specification import org.specs2.specification.AfterAll import play.api.libs.json.JsString import play.api.libs.json.JsValue import play.api.libs.json.Json import play.api.libs.ws.JsonBodyReadables import play.api.libs.ws.JsonBodyWritables import play.libs.ws.DefaultObjectMapper import play.shaded.ahc.org.asynchttpclient.Response import scala.io.Codec class JsonRequestSpec extends Specification with Mockito with AfterAll with JsonBodyWritables { sequential implicit val system = ActorSystem() implicit val materializer = Materializer.matFromSystem override def afterAll: Unit = { system.terminate() } "set a json node" in { val jsValue = Json.obj("k1" -> JsString("v1")) val client = mock[StandaloneAhcWSClient] val req = new StandaloneAhcWSRequest(client, "http://playframework.com/", null) .withBody(jsValue) .asInstanceOf[StandaloneAhcWSRequest] .buildRequest() req.getHeaders.get("Content-Type") must be_==("application/json") ByteString.fromArray(req.getByteData).utf8String must be_==("""{"k1":"v1"}""") } "set a json node using the default object mapper" in { val objectMapper = DefaultObjectMapper.instance implicit val jsonReadable = body(objectMapper) val jsonNode = objectMapper.readTree("""{"k1":"v1"}""") val client = mock[StandaloneAhcWSClient] val req = new StandaloneAhcWSRequest(client, "http://playframework.com/", null) .withBody(jsonNode) .asInstanceOf[StandaloneAhcWSRequest] .buildRequest() req.getHeaders.get("Content-Type") must be_==("application/json") ByteString.fromArray(req.getByteData).utf8String must be_==("""{"k1":"v1"}""") } "read an encoding of UTF-8" in { val json = io.Source.fromResource("test.json")(Codec.ISO8859).getLines.mkString val ahcResponse = mock[Response] val response = new StandaloneAhcWSResponse(ahcResponse) when(ahcResponse.getResponseBody(StandardCharsets.UTF_8)).thenReturn(json) when(ahcResponse.getContentType).thenReturn("application/json") val value: JsValue = JsonBodyReadables.readableAsJson.transform(response) verify(ahcResponse, times(1)).getResponseBody(StandardCharsets.UTF_8) verify(ahcResponse, times(1)).getContentType value.toString must beEqualTo(json) } "read an encoding of ISO-8859-1" in { val json = io.Source.fromResource("test.json")(Codec.ISO8859).getLines.mkString val ahcResponse = mock[Response] val response = new StandaloneAhcWSResponse(ahcResponse) when(ahcResponse.getResponseBody(StandardCharsets.ISO_8859_1)).thenReturn(json) when(ahcResponse.getContentType).thenReturn("application/json;charset=iso-8859-1") val value: JsValue = JsonBodyReadables.readableAsJson.transform(response) verify(ahcResponse, times(1)).getResponseBody(StandardCharsets.ISO_8859_1) verify(ahcResponse, times(1)).getContentType value.toString must beEqualTo(json) } }
Example 3
Source File: PrintUtils.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev import java.io.PrintWriter import scala.io.{Codec, Source} object PrintUtils { def columnVectorToFile(fileName: String, v: Array[Double]): Unit = { val writer = new PrintWriter(fileName) for (a <- v) { writer.println(a.toString) } writer.close() } def stringsToFile(fileName: String, v: Array[String]): Unit = { val writer = new PrintWriter(fileName) for (a <- v) { writer.println(a) } writer.close() } def withPrintWriter(fileName: String, body: PrintWriter => Unit): Unit = { val writer = new PrintWriter(fileName, "UTF-8") body(writer) writer.close() } } object FileReadUtils { def withLinesIterator[T](fileName: String)(body: Iterator[String] => T): T = { val source = Source.fromFile(fileName)(Codec.UTF8) val result = body(source.getLines()) source.close() result } }
Example 4
Source File: CsvSinkTests.scala From kantan.csv with Apache License 2.0 | 5 votes |
package kantan.csv import java.io._ import laws.discipline.arbitrary._ import ops._ import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks import scala.io.Codec class CsvSinkTests extends AnyFunSuite with ScalaCheckPropertyChecks with Matchers { test("CSV data should be correctly written to an output stream (bit by bit)") { forAll(csv) { csv => val out = new ByteArrayOutputStream() csv.foldLeft(out.asCsvWriter[List[String]](rfc))(_ write _).close() new String(out.toByteArray, Codec.UTF8.charSet) should be(csv.asCsv(rfc)) } } test("CSV data should be correctly written to an output stream (in bulk)") { forAll(csv) { csv => val out = new ByteArrayOutputStream() out.writeCsv(csv, rfc) new String(out.toByteArray, Codec.UTF8.charSet) should be(csv.asCsv(rfc)) } } test("CSV data should be correctly written to a writer (bit by bit)") { forAll(csv) { csv => val out = new StringWriter() csv.foldLeft(out.asCsvWriter[List[String]](rfc))(_ write _).close() out.toString should be(csv.asCsv(rfc)) } } test("CSV data should be correctly written to a writer (in bulk)") { forAll(csv) { csv => val out = new StringWriter() out.writeCsv(csv, rfc) out.toString should be(csv.asCsv(rfc)) } } }
Example 5
Source File: Report.scala From AppCrawler with Apache License 2.0 | 5 votes |
package com.testerhome.appcrawler import org.apache.commons.io.FileUtils import org.scalatest.tools.Runner import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.io.{Source, Codec} import scala.reflect.io.File import collection.JavaConversions._ log.info(s"run ${cmdArgs.mkString(" ")}") Runner.run(cmdArgs) changeTitle() } def changeTitle(title:String=Report.title): Unit ={ val originTitle="ScalaTest Results" val indexFile=reportPath+"/index.html" val newContent=Source.fromFile(indexFile).mkString.replace(originTitle, title) scala.reflect.io.File(indexFile).writeAll(newContent) } } object Report extends Report{ var showCancel=false var title="AppCrawler" var master="" var candidate="" var reportDir="" var store=new URIElementStore def loadResult(elementsFile: String): URIElementStore ={ DataObject.fromYaml[URIElementStore](Source.fromFile(elementsFile).mkString) } }
Example 6
Source File: HeaderCreator.scala From sbt-header with Apache License 2.0 | 5 votes |
package de.heikoseeberger.sbtheader import java.io.InputStream import sbt.Logger import scala.io.Codec object HeaderCreator { def apply( fileType: FileType, commentStyle: CommentStyle, license: License, headerEmptyLine: Boolean, log: Logger, input: InputStream ): HeaderCreator = new HeaderCreator(fileType, commentStyle, license, headerEmptyLine, log, input) } final class HeaderCreator private ( fileType: FileType, commentStyle: CommentStyle, license: License, headerEmptyLine: Boolean, log: Logger, input: InputStream ) { private val crlf = """(?s)(?:.*)(\r\n)(?:.*)""".r private val cr = """(?s)(?:.*)(\r)(?:.*)""".r private val headerPattern = commentStyle.pattern private val (firstLine, text) = { val fileContent = try scala.io.Source.fromInputStream(input)(Codec.UTF8).mkString finally input.close() fileType.firstLinePattern match { case Some(pattern) => fileContent match { case pattern(first, rest) => (first, rest) case other => ("", other) } case _ => ("", fileContent) } } log.debug(s"First line of file is:$newLine$firstLine") log.debug(s"Text of file is:$newLine$text") private val fileNewLine = text match { case crlf(_) => "\r\n" case cr(_) => "\r" case _ => "\n" } private def newHeaderText(existingHeader: Option[String]) = { val suffix = if (headerEmptyLine) "" else newLine val headerText = commentStyle(license, existingHeader).stripSuffix(suffix) val headerNewLine = headerText match { case crlf(_) => "\r\n" case cr(_) => "\r" case _ => "\n" } headerText.replace(headerNewLine, fileNewLine) } private val modifiedText = text match { case headerPattern(existingText, body) => val newText = newHeaderText(Some(existingText)) if (newText == existingText) None else Some(firstLine + newText + body.replaceAll("""^\s+""", "")) // Trim left case body if body.isEmpty => None case body => Some(firstLine + newHeaderText(None) + body.replaceAll("""^\s+""", "")) // Trim left } log.debug(s"Modified text of file is:$newLine$modifiedText") def createText: Option[String] = modifiedText }
Example 7
Source File: ProcessBuilderUtils.scala From scalastringcourseday7 with Apache License 2.0 | 5 votes |
package util import java.io.ByteArrayInputStream import java.nio.charset.{Charset, CodingErrorAction} import text.StringOption import scala.collection.mutable.ListBuffer import scala.io.{Codec, Source} import scala.sys.process.ProcessBuilder object ProcessBuilderUtils { implicit def processToProcessUtils(repr: ProcessBuilder): ProcessBuilderUtils = { new ProcessBuilderUtils(repr) } } class ProcessBuilderUtils(repr: ProcessBuilder) { def lineStream(encoding: Charset, onMalformedInput: CodingErrorAction, onUnmappableCharacter: CodingErrorAction, replacementOpt: StringOption): Iterator[String] = { val lines: Iterator[String] = repr.lineStream_!.iterator val byteBuffer = ListBuffer.empty[Byte] while (lines.hasNext) { val line: String = lines.next.trim concat "\n" byteBuffer ++= line.getBytes } implicit val codec = Codec(encoding). onMalformedInput(onMalformedInput). onUnmappableCharacter(onUnmappableCharacter) if (replacementOpt.nonEmpty) { codec.decodingReplaceWith(replacementOpt.get) } Source.fromInputStream(new ByteArrayInputStream(byteBuffer.toArray)).getLines } }
Example 8
Source File: WeightedLevenshtein.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp.annotators.spell.context import com.github.liblevenshtein.transducer.{Candidate, ITransducer} import com.johnsnowlabs.nlp.annotators.spell.context.parser.RegexParser import scala.collection.mutable import scala.io.Codec import scala.math.min trait WeightedLevenshtein { def levenshteinDist(s11: String, s22: String)(cost:(String, String) => Float): Float = { // cope with start of string val s1 = s"^${s11}_" val s2 = s"^${s22}_" val s1_ = s"_^${s11}_" val s2_ = s"_^${s22}_" val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f } for (j <- 1 to s2.length; i <- 1 to s1.length) dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1) else { minimum(dist(j - 1)(i) + cost(s2_.substring(j - 1, j + 1), s1(i - 1) + "Ɛ"), //insert in s1 dist(j)(i - 1) + cost(s2(j - 1) + "Ɛ", s1_.substring(i - 1, i + 1)), //insert in s2 dist(j - 1)(i - 1) + cost(s2(j - 1).toString, s1(i - 1).toString)) } dist(s2.length)(s1.length) } def wLevenshteinDist(s1:String, s2:String, weights:Map[String, Map[String, Float]]) = levenshteinDist(s1, s2)(genCost(weights)) def loadWeights(filename: String): Map[String, Map[String, Float]] = { // store word ids val vocabIdxs = mutable.HashMap[String, mutable.Map[String, Float]]() implicit val codec: Codec = Codec.UTF8 scala.io.Source.fromFile(filename).getLines.foreach { case line => val lineFields = line.split("\\|") val dist = vocabIdxs.getOrElse(lineFields(0), mutable.Map[String, Float]()).updated(lineFields(1), lineFields(2).toFloat) vocabIdxs.update(lineFields(0), dist) } vocabIdxs.toMap.mapValues(_.toMap) } private def genCost(weights: Map[String, Map[String, Float]])(a:String, b:String): Float = { if (weights.contains(a) && weights(a).contains(b)) weights(a)(b) else if (a == b) { 0.0f } else 1.0f } private def minimum(i1: Float, i2: Float, i3: Float) = min(min(i1, i2), i3) def learnDist(s1: String, s2: String): Seq[(String, String)] = { val acc: Seq[(String, String)] = Seq.empty val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f } for (j <- 1 to s2.length; i <- 1 to s1.length) dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1) else minimum( dist(j - 1)(i) + 1.0f, dist(j)(i - 1) + 1.0f, dist(j - 1)(i - 1) + 1.0f) backTrack(dist, s2, s1, s2.length, s1.length, acc) } def backTrack(dist: Array[Array[Float]], s2:String, s1:String, j:Int, i:Int, acc:Seq[(String, String)]): Seq[(String, String)]= { if (s2(j-1) == s1(i-1)) { if (j == 1 && i == 1) acc else backTrack(dist, s2, s1, j - 1, i - 1, acc) } else { val pSteps = Map(dist(j - 1)(i) -> ("", s2(j - 1).toString, j - 1, i), dist(j)(i - 1) -> (s1(i - 1).toString, "", j, i - 1), dist(j - 1)(i - 1) -> (s1(i - 1).toString, s2(j - 1).toString, j - 1, i - 1)) val best = pSteps.minBy(_._1)._2 backTrack(dist, s2, s1, best._3, best._4, acc :+ (best._1, best._2)) } } }
Example 9
Source File: InvokeMigrationHandler.scala From flyway-awslambda with MIT License | 5 votes |
package crossroad0201.aws.flywaylambda import java.io.{BufferedOutputStream, InputStream, OutputStream, PrintWriter} import com.amazonaws.regions.{Region, Regions} import com.amazonaws.services.lambda.runtime.{Context, RequestStreamHandler} import com.amazonaws.services.s3.{AmazonS3, AmazonS3Client} import scala.io.{BufferedSource, Codec} import scala.util.{Failure, Success, Try} class InvokeMigrationHandler extends RequestStreamHandler with S3MigrationHandlerBase { type BucketName = String type Prefix = String type ConfFileName = String override def handleRequest(input: InputStream, output: OutputStream, context: Context): Unit = { def parseInput: Try[(BucketName, Prefix, ConfFileName)] = Try { import spray.json._ import DefaultJsonProtocol._ val json = new BufferedSource(input)(Codec("UTF-8")).mkString val jsObj = JsonParser(json).toJson.asJsObject jsObj.getFields( "bucket_name", "prefix" ) match { case Seq(JsString(b), JsString(p)) => { jsObj.getFields( "flyway_conf" ) match { case Seq(JsString(c)) => (b, p, c) case _ => (b, p, "flyway.conf") } } case _ => throw new IllegalArgumentException(s"Missing require key [bucketName, prefix]. - $json") } } val logger = context.getLogger implicit val s3Client: AmazonS3 = new AmazonS3Client().withRegion(Region.getRegion(Regions.fromName(sys.env("AWS_REGION")))) (for { i <- parseInput _ = { logger.log(s"Flyway migration start. by invoke lambda function(${i._1}, ${i._2}, ${i._3}).") } r <- migrate(i._1, i._2, i._3)(context, s3Client) } yield r) match { case Success(r) => logger.log(r) val b = r.getBytes("UTF-8") val bout = new BufferedOutputStream(output) Stream.continually(bout.write(b)) bout.flush() case Failure(e) => e.printStackTrace() val w = new PrintWriter(output) w.write(e.toString) w.flush() } } }
Example 10
Source File: IngestSpec.scala From CSYE7200_Old with MIT License | 5 votes |
package edu.neu.coe.csye7200.asstmd import org.scalatest.{FlatSpec, Matchers} import scala.io.{Codec, Source} import scala.util._ class IngestSpec extends FlatSpec with Matchers { behavior of "ingest" it should "work for Int" in { trait IngestibleInt extends Ingestible[Int] { def fromString(w: String): Try[Int] = Try(w.toInt) } implicit object IngestibleInt extends IngestibleInt val source = Source.fromChars(Array('x', '\n', '4', '2')) val ingester = new Ingest[Int]() val xys = ingester(source).toSeq // TO BE IMPLEMENTED check that xys has exactly one element, consisting of Success(42) -- 10 points } it should "work for movie database" in { implicit val codec: Codec = Codec("UTF-8") // NOTE that you expect to see a number of exceptions thrown. That's OK. We expect that some lines will not parse correctly. Try(Source.fromResource("movie_metadata.csv")) match { case Success(source) => val ingester = new Ingest[Movie]() val mys: Seq[Try[Movie]] = (for (my <- ingester(source)) yield my.transform( { m => Success(m) }, { e => System.err.println(e); my } )).toSeq val mos: Seq[Option[Movie]] = for (my <- mys) yield for (m <- my.toOption; if m.production.country == "New Zealand") yield m val ms = mos.flatten ms.size shouldBe 4 ms foreach { println(_) } source.close() case Failure(x) => fail(x) } } }
Example 11
Source File: IngestSpec.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.asstmd import org.scalatest.{FlatSpec, Matchers} import scala.io.{Codec, Source} import scala.util._ class IngestSpec extends FlatSpec with Matchers { behavior of "ingest" it should "work for Int" in { trait IngestibleInt extends Ingestible[Int] { def fromString(w: String): Try[Int] = Try(w.toInt) } implicit object IngestibleInt extends IngestibleInt val source = Source.fromChars(Array('x', '\n', '4', '2')) val ingester = new Ingest[Int]() val xys = ingester(source).toSeq // check that xys has exactly one element, consisting of Success(42) -- 10 points // TO BE IMPLEMENTED ??? } it should "work for movie database" in { implicit val codec: Codec = Codec("UTF-8") // NOTE that you expect to see a number of exceptions thrown. That's OK. We expect that some lines will not parse correctly. Try(Source.fromResource("movie_metadata.csv")) match { case Success(source) => val ingester = new Ingest[Movie]() val mys: Seq[Try[Movie]] = (for (my <- ingester(source)) yield my.transform( { m => Success(m) }, { e => System.err.println(e); my } )).toSeq val mos: Seq[Option[Movie]] = for (my <- mys) yield for (m <- my.toOption; if m.production.country == "New Zealand") yield m val ms = mos.flatten ms.size shouldBe 4 ms foreach { println(_) } source.close() case Failure(x) => fail(x) } } }
Example 12
Source File: ModelConfigurationParser.scala From modelmatrix with Apache License 2.0 | 5 votes |
package com.collective.modelmatrix import java.nio.charset.CodingErrorAction import java.security.MessageDigest import java.util.function.BiConsumer import com.typesafe.config.{Config, ConfigValue} import scala.io.Codec import scalaz.{Failure, Success, ValidationNel} class ModelConfigurationParser(config: Config, path: String = "features") { type FeatureDefinition = (String, ValidationNel[String, ModelFeature]) private lazy val configLines: Seq[(String, Int)] = { implicit val codec = Codec("UTF-8") codec.onMalformedInput(CodingErrorAction.REPLACE) codec.onUnmappableCharacter(CodingErrorAction.REPLACE) contentLines.zipWithIndex } // Try to find feature row index in original config if possible private def featureIndex(f: String): Int = { configLines.find(_._1.contains(f)).map(_._2).getOrElse(0) } private[this] val originUrl = config.origin().url() // configuration file as lines lazy val contentLines: Seq[String] = { if (originUrl != null) { scala.io.Source.fromURL(originUrl).getLines().toSeq // ideally this case below should never happen unless the Config passed in argument is not parsed from a file } else Seq.empty } // configuration file as a String lazy val content: String = contentLines.mkString(System.lineSeparator()) // md5sum of the configuration content lazy val checksum: String = MessageDigest.getInstance("MD5").digest(content.getBytes).map("%02X".format(_)).mkString def features(): Seq[FeatureDefinition] = { val builder = collection.mutable.ListBuffer.empty[FeatureDefinition] config.getObject(path).forEach(new BiConsumer[String, ConfigValue] { def accept(t: String, u: ConfigValue): Unit = { val parsedFeature = ModelFeature.parse(t, u.atKey(t), t) builder += (t -> parsedFeature) } }) builder.toSeq.sortBy { case (f, Success(feature)) => (true, featureIndex(feature.feature), feature.group, feature.feature) case (f, Failure(_)) => (false, featureIndex(f), "", f) } } }
Example 13
Source File: utf8.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.nio.charset.Charset import scala.io.Codec object UTF8 { // see also https://en.wikipedia.org/wiki/UTF-8#Description // overlong encodings enable byte-stuffing of low-ASCII def decode_permissive(text: CharSequence): String = { val buf = new java.lang.StringBuilder(text.length) var code = -1 var rest = 0 def flush() { if (code != -1) { if (rest == 0 && Character.isValidCodePoint(code)) buf.appendCodePoint(code) else buf.append('\uFFFD') code = -1 rest = 0 } } def init(x: Int, n: Int) { flush() code = x rest = n } def push(x: Int) { if (rest <= 0) init(x, -1) else { code <<= 6 code += x rest -= 1 } } for (i <- 0 until text.length) { val c = text.charAt(i) if (c < 128) { flush(); buf.append(c) } else if ((c & 0xC0) == 0x80) push(c & 0x3F) else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1) else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2) else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3) } flush() buf.toString } private class Decode_Chars(decode: String => String, buffer: Array[Byte], start: Int, end: Int) extends CharSequence { def length: Int = end - start def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char] def subSequence(i: Int, j: Int): CharSequence = new Decode_Chars(decode, buffer, start + i, start + j) // toString with adhoc decoding: abuse of CharSequence interface override def toString: String = decode(decode_permissive(this)) } def decode_chars(decode: String => String, buffer: Array[Byte], start: Int, end: Int): CharSequence = { require(0 <= start && start <= end && end <= buffer.length) new Decode_Chars(decode, buffer, start, end) } }
Example 14
Source File: utf8.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.nio.charset.Charset import scala.io.Codec object UTF8 { // see also https://en.wikipedia.org/wiki/UTF-8#Description // overlong encodings enable byte-stuffing of low-ASCII def decode_permissive(text: CharSequence): String = { val buf = new java.lang.StringBuilder(text.length) var code = -1 var rest = 0 def flush() { if (code != -1) { if (rest == 0 && Character.isValidCodePoint(code)) buf.appendCodePoint(code) else buf.append('\uFFFD') code = -1 rest = 0 } } def init(x: Int, n: Int) { flush() code = x rest = n } def push(x: Int) { if (rest <= 0) init(x, -1) else { code <<= 6 code += x rest -= 1 } } for (i <- 0 until text.length) { val c = text.charAt(i) if (c < 128) { flush(); buf.append(c) } else if ((c & 0xC0) == 0x80) push(c & 0x3F) else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1) else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2) else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3) } flush() buf.toString } private class Decode_Chars(decode: String => String, buffer: Array[Byte], start: Int, end: Int) extends CharSequence { def length: Int = end - start def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char] def subSequence(i: Int, j: Int): CharSequence = new Decode_Chars(decode, buffer, start + i, start + j) // toString with adhoc decoding: abuse of CharSequence interface override def toString: String = decode(decode_permissive(this)) } def decode_chars(decode: String => String, buffer: Array[Byte], start: Int, end: Int): CharSequence = { require(0 <= start && start <= end && end <= buffer.length) new Decode_Chars(decode, buffer, start, end) } }
Example 15
Source File: utf8.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.nio.charset.Charset import scala.io.Codec object UTF8 { // see also http://en.wikipedia.org/wiki/UTF-8#Description // overlong encodings enable byte-stuffing of low-ASCII def decode_permissive(text: CharSequence): String = { val buf = new java.lang.StringBuilder(text.length) var code = -1 var rest = 0 def flush() { if (code != -1) { if (rest == 0 && Character.isValidCodePoint(code)) buf.appendCodePoint(code) else buf.append('\uFFFD') code = -1 rest = 0 } } def init(x: Int, n: Int) { flush() code = x rest = n } def push(x: Int) { if (rest <= 0) init(x, -1) else { code <<= 6 code += x rest -= 1 } } for (i <- 0 until text.length) { val c = text.charAt(i) if (c < 128) { flush(); buf.append(c) } else if ((c & 0xC0) == 0x80) push(c & 0x3F) else if ((c & 0xE0) == 0xC0) init(c & 0x1F, 1) else if ((c & 0xF0) == 0xE0) init(c & 0x0F, 2) else if ((c & 0xF8) == 0xF0) init(c & 0x07, 3) } flush() buf.toString } private class Decode_Chars(decode: String => String, buffer: Array[Byte], start: Int, end: Int) extends CharSequence { def length: Int = end - start def charAt(i: Int): Char = (buffer(start + i).asInstanceOf[Int] & 0xFF).asInstanceOf[Char] def subSequence(i: Int, j: Int): CharSequence = new Decode_Chars(decode, buffer, start + i, start + j) // toString with adhoc decoding: abuse of CharSequence interface override def toString: String = decode(decode_permissive(this)) } def decode_chars(decode: String => String, buffer: Array[Byte], start: Int, end: Int): CharSequence = { require(0 <= start && start <= end && end <= buffer.length) new Decode_Chars(decode, buffer, start, end) } }