java.sql.Date Scala Examples
The following examples show how to use java.sql.Date.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkNarrowTest.scala From spark-tools with Apache License 2.0 | 7 votes |
package io.univalence import java.net.URLClassLoader import java.sql.Date import io.univalence.centrifuge.Sparknarrow import org.apache.spark.SparkConf import org.apache.spark.sql.types._ import org.apache.spark.sql.Encoders import org.apache.spark.sql.SparkSession import org.scalatest.FunSuite case class Person(name: String, age: Int, date: Date) class SparknarrowTest extends FunSuite { val conf: SparkConf = new SparkConf() conf.setAppName("yo") conf.set("spark.sql.caseSensitive", "true") conf.setMaster("local[2]") implicit val ss: SparkSession = SparkSession.builder.config(conf).getOrCreate import ss.implicits._ test("testBasicCC") { val classDef = Sparknarrow.basicCC(Encoders.product[Person].schema).classDef checkDefinition(classDef) } def checkDefinition(scalaCode: String): Unit = { //TODO do a version for 2.11 and 2.12 } test("play with scala eval") { val code = """ case class Tata(str: String) case class Toto(age: Int, tata: Tata) """ checkDefinition(code) checkDefinition(code) } ignore("printSchema StructType") { val yo = StructType( Seq( StructField("name", StringType), StructField("tel", ArrayType(StringType)) ) ) yo.printTreeString() } }
Example 2
Source File: DateDecoderTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.record.decoder import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} import com.sksamuel.avro4s.SchemaFor.TimestampNanosLogicalType import com.sksamuel.avro4s.{AvroSchema, Decoder, SchemaFor} import org.apache.avro.generic.GenericData import org.apache.avro.{LogicalTypes, SchemaBuilder} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers //noinspection ScalaDeprecation class DateDecoderTest extends AnyFunSuite with Matchers { case class WithLocalTime(z: LocalTime) case class WithLocalDate(z: LocalDate) case class WithDate(z: Date) case class WithLocalDateTime(z: LocalDateTime) case class WithTimestamp(z: Timestamp) case class WithInstant(z: Instant) test("decode int to LocalTime") { val schema = AvroSchema[WithLocalTime] val record = new GenericData.Record(schema) record.put("z", 46245000000L) Decoder[WithLocalTime].decode(record) shouldBe WithLocalTime(LocalTime.of(12, 50, 45)) } test("decode int to LocalDate") { val schema = AvroSchema[WithLocalDate] val record = new GenericData.Record(schema) record.put("z", 17784) Decoder[WithLocalDate].decode(record) shouldBe WithLocalDate(LocalDate.of(2018, 9, 10)) } test("decode int to java.sql.Date") { val schema = AvroSchema[WithDate] val record = new GenericData.Record(schema) record.put("z", 17784) Decoder[WithDate].decode(record) shouldBe WithDate(Date.valueOf(LocalDate.of(2018, 9, 10))) } test("decode timestamp-millis to LocalDateTime") { val dateSchema = LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder.longType) val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord() val record = new GenericData.Record(schema) record.put("z", 1572707106376L) Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime( LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000000)) } test("decode timestamp-micros to LocalDateTime") { val dateSchema = LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder.longType) val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord() val record = new GenericData.Record(schema) record.put("z", 1572707106376001L) Decoder[WithLocalDateTime].withSchema(SchemaFor(schema)).decode(record) shouldBe WithLocalDateTime( LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376001000)) } test("decode timestamp-nanos to LocalDateTime") { val dateSchema = TimestampNanosLogicalType.addToSchema(SchemaBuilder.builder.longType) val schema = SchemaBuilder.record("foo").fields().name("z").`type`(dateSchema).noDefault().endRecord() val record = new GenericData.Record(schema) record.put("z", 1572707106376000002L) Decoder[WithLocalDateTime].decode(record) shouldBe WithLocalDateTime( LocalDateTime.of(2019, 11, 2, 15, 5, 6, 376000002)) } test("decode long to Timestamp") { val schema = AvroSchema[WithTimestamp] val record = new GenericData.Record(schema) record.put("z", 1538312231000L) Decoder[WithTimestamp].decode(record) shouldBe WithTimestamp(new Timestamp(1538312231000L)) } test("decode long to Instant") { val schema = AvroSchema[WithInstant] val record = new GenericData.Record(schema) record.put("z", 1538312231000L) Decoder[WithInstant].decode(record) shouldBe WithInstant(Instant.ofEpochMilli(1538312231000L)) } }
Example 3
Source File: BigQueryUtilsSpec.scala From comet-data-pipeline with Apache License 2.0 | 5 votes |
package com.ebiznext.comet.utils.conversion import java.sql.{Date, Timestamp} import com.ebiznext.comet.TestHelper import com.ebiznext.comet.config.SparkEnv import com.ebiznext.comet.utils.conversion.BigQueryUtils._ import com.ebiznext.comet.utils.conversion.syntax._ import org.apache.spark.sql.SparkSession import com.google.cloud.bigquery.{Field, StandardSQLTypeName, Schema => BQSchema} class BigQueryUtilsSpec extends TestHelper { new WithSettings() { val sparkEnv: SparkEnv = new SparkEnv("test") val session: SparkSession = sparkEnv.session import session.implicits._ "Spark Types" should "be converted to corresponding BQ Types" in { val res: BQSchema = List( ( 1, true, 2.5, "hello", 'x'.asInstanceOf[Byte], new Date(System.currentTimeMillis()), new Timestamp(System.currentTimeMillis()) ) ).toDF().to[BQSchema] //Schema{fields=[Field{name=value, type=INTEGER, mode=NULLABLE, description=, policyTags=null}]} val fields = List( Field .newBuilder("_1", StandardSQLTypeName.INT64) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_2", StandardSQLTypeName.BOOL) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_3", StandardSQLTypeName.FLOAT64) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_4", StandardSQLTypeName.STRING) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_5", StandardSQLTypeName.INT64) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_6", StandardSQLTypeName.DATE) .setDescription("") .setMode(Field.Mode.NULLABLE) .build(), Field .newBuilder("_7", StandardSQLTypeName.TIMESTAMP) .setDescription("") .setMode(Field.Mode.NULLABLE) .build() ) res.getFields should contain theSameElementsInOrderAs fields } } }
Example 4
Source File: SchemaColumnSelection.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import scala.reflect.runtime.universe.TypeTag import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.{rand, udf} case class SchemaColumnSelection[T](override val name: String, values: List[T])(implicit tag: TypeTag[T]) extends SchemaColumn { override def column(rowID: Option[Column] = None): Column = { val intToSelectionUDF = udf((index: Int) => { values(index) }) intToSelectionUDF(rand() * values.length % values.length) } } object SchemaColumnSelectionProtocol extends SchemaColumnSelectionProtocol trait SchemaColumnSelectionProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnSelectionFormat extends YamlFormat[SchemaColumnSelection[_]] { override def read(yaml: YamlValue): SchemaColumnSelection[_] = { val fields = yaml.asYamlObject.fields val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set")) val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val values = fields.getOrElse(YamlString("values"), deserializationError("selection values not set")) dataType match { case SchemaColumnDataType.Int => SchemaColumnSelection(name, values.convertTo[List[Int]]) case SchemaColumnDataType.Long => SchemaColumnSelection(name, values.convertTo[List[Long]]) case SchemaColumnDataType.Float => SchemaColumnSelection(name, values.convertTo[List[Float]]) case SchemaColumnDataType.Double => SchemaColumnSelection(name, values.convertTo[List[Double]]) case SchemaColumnDataType.Date => SchemaColumnSelection(name, values.convertTo[List[Date]]) case SchemaColumnDataType.Timestamp => SchemaColumnSelection(name, values.convertTo[List[Timestamp]]) case SchemaColumnDataType.String => SchemaColumnSelection(name, values.convertTo[List[String]]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Selection}") } } override def write(obj: SchemaColumnSelection[_]): YamlValue = ??? } }
Example 5
Source File: SchemaColumnRandom.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.{to_utc_timestamp, round, rand, from_unixtime, to_date} import org.apache.spark.sql.types.{IntegerType, LongType} trait SchemaColumnRandom[T] extends SchemaColumn object SchemaColumnRandom { val FloatDP = 3 val DoubleDP = 3 def apply(name: String, min: Int, max: Int): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Long, max: Long): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Float, max: Float): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Double, max: Double): SchemaColumn = SchemaColumnRandomNumeric(name, min, max) def apply(name: String, min: Date, max: Date): SchemaColumn = SchemaColumnRandomDate(name, min, max) def apply(name: String, min: Timestamp, max: Timestamp): SchemaColumn = SchemaColumnRandomTimestamp(name, min, max) def apply(name: String): SchemaColumn = SchemaColumnRandomBoolean(name) } private case class SchemaColumnRandomNumeric[T: Numeric](override val name: String, min: T, max: T) extends SchemaColumnRandom[T] { override def column(rowID: Option[Column] = None): Column = { import Numeric.Implicits._ (min, max) match { case (_: Int, _: Int) => round(rand() * (max - min) + min, 0).cast(IntegerType) case (_: Long, _: Long) => round(rand() * (max - min) + min, 0).cast(LongType) case (_: Float, _: Float) => round(rand() * (max - min) + min, SchemaColumnRandom.FloatDP) case (_: Double, _: Double) => round(rand() * (max - min) + min, SchemaColumnRandom.DoubleDP) } } } private case class SchemaColumnRandomTimestamp(override val name: String, min: Timestamp, max: Timestamp) extends SchemaColumnRandom[Timestamp] { override def column(rowID: Option[Column] = None): Column = { val minTime = min.getTime / 1000 val maxTime = max.getTime / 1000 to_utc_timestamp(from_unixtime(rand() * (maxTime - minTime) + minTime), "UTC") } } private case class SchemaColumnRandomDate(override val name: String, min: Date, max: Date) extends SchemaColumnRandom[Date] { val timestamp = SchemaColumnRandomTimestamp(name, new Timestamp(min.getTime), new Timestamp(max.getTime + 86400000)) override def column(rowID: Option[Column] = None): Column = to_date(timestamp.column()) } private case class SchemaColumnRandomBoolean(override val name: String) extends SchemaColumnRandom[Boolean] { override def column(rowID: Option[Column] = None): Column = rand() < 0.5f } object SchemaColumnRandomProtocol extends SchemaColumnRandomProtocol trait SchemaColumnRandomProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnRandomFormat extends YamlFormat[SchemaColumnRandom[_]] { override def read(yaml: YamlValue): SchemaColumnRandom[_] = { val fields = yaml.asYamlObject.fields val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name")) if (dataType == SchemaColumnDataType.Boolean) { SchemaColumnRandomBoolean(name) } else { val min = fields.getOrElse(YamlString("min"), deserializationError(s"min not set for $name")) val max = fields.getOrElse(YamlString("max"), deserializationError(s"max not set for $name")) dataType match { case SchemaColumnDataType.Int => SchemaColumnRandomNumeric(name, min.convertTo[Int], max.convertTo[Int]) case SchemaColumnDataType.Long => SchemaColumnRandomNumeric(name, min.convertTo[Long], max.convertTo[Long]) case SchemaColumnDataType.Float => SchemaColumnRandomNumeric(name, min.convertTo[Float], max.convertTo[Float]) case SchemaColumnDataType.Double => SchemaColumnRandomNumeric(name, min.convertTo[Double], max.convertTo[Double]) case SchemaColumnDataType.Date => SchemaColumnRandomDate(name, min.convertTo[Date], max.convertTo[Date]) case SchemaColumnDataType.Timestamp => SchemaColumnRandomTimestamp(name, min.convertTo[Timestamp], max.convertTo[Timestamp]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Random}") } } } override def write(obj: SchemaColumnRandom[_]): YamlValue = ??? } }
Example 6
Source File: SchemaColumnSequential.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.{to_utc_timestamp, from_unixtime, monotonically_increasing_id, to_date} trait SchemaColumnSequential[T] extends SchemaColumn object SchemaColumnSequential { def apply(name: String, start: Int, step: Int): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Long, step: Long): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Float, step: Float): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Double, step: Double): SchemaColumn = SchemaColumnSequentialNumeric(name, start, step) def apply(name: String, start: Date, step: Int): SchemaColumn = SchemaColumnSequentialDate(name, start, step) def apply(name: String, start: Timestamp, step: Int): SchemaColumn = SchemaColumnSequentialTimestamp(name, start, step) } private case class SchemaColumnSequentialNumeric[T: Numeric](override val name: String, start: T, step: T) extends SchemaColumnSequential[T] { override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = (rowID.get * step) + start } private case class SchemaColumnSequentialTimestamp(override val name: String, start: Timestamp, stepSeconds: Int) extends SchemaColumnSequential[Timestamp] { override def column(rowID: Option[Column] = Some(monotonically_increasing_id)): Column = { val startTime = start.getTime / 1000 to_utc_timestamp(from_unixtime(rowID.get * stepSeconds + startTime), "UTC") } } private case class SchemaColumnSequentialDate(override val name: String, start: Date, stepDays: Int) extends SchemaColumnSequential[Date] { val timestamp = SchemaColumnSequentialTimestamp(name, new Timestamp(start.getTime), stepDays * 86400) override def column(rowID: Option[Column]): Column = to_date(timestamp.column()) } object SchemaColumnSequentialProtocol extends SchemaColumnSequentialProtocol trait SchemaColumnSequentialProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnSequentialFormat extends YamlFormat[SchemaColumnSequential[_]] { override def read(yaml: YamlValue): SchemaColumnSequential[_] = { val fields = yaml.asYamlObject.fields val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError("data_type not set")) val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val start = fields.getOrElse(YamlString("start"), deserializationError("start not set")) val step = fields.getOrElse(YamlString("step"), deserializationError("step not set")) dataType match { case "Int" => SchemaColumnSequentialNumeric(name, start.convertTo[Int], step.convertTo[Int]) case "Long" => SchemaColumnSequentialNumeric(name, start.convertTo[Long], step.convertTo[Long]) case "Float" => SchemaColumnSequentialNumeric(name, start.convertTo[Float], step.convertTo[Float]) case "Double" => SchemaColumnSequentialNumeric(name, start.convertTo[Double], step.convertTo[Double]) case "Date" => SchemaColumnSequentialDate(name, start.convertTo[Date], step.convertTo[Int]) case "Timestamp" => SchemaColumnSequentialTimestamp(name, start.convertTo[Timestamp], step.convertTo[Int]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Sequential}") } } override def write(obj: SchemaColumnSequential[_]): YamlValue = ??? } }
Example 7
Source File: SchemaColumnFixed.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol import org.apache.spark.sql.Column import org.apache.spark.sql.functions.lit case class SchemaColumnFixed[T](override val name: String, value: T) extends SchemaColumn { override def column(rowID: Option[Column] = None): Column = lit(value) } object SchemaColumnFixedProtocol extends SchemaColumnFixedProtocol trait SchemaColumnFixedProtocol extends YamlParserProtocol { import net.jcazevedo.moultingyaml._ implicit object SchemaColumnFixedFormat extends YamlFormat[SchemaColumnFixed[_]] { override def read(yaml: YamlValue): SchemaColumnFixed[_] = { val fields = yaml.asYamlObject.fields val YamlString(name) = fields.getOrElse(YamlString("name"), deserializationError("name not set")) val YamlString(dataType) = fields.getOrElse(YamlString("data_type"), deserializationError(s"data_type not set for $name")) val value = fields.getOrElse(YamlString("value"), deserializationError(s"value not set for $name")) dataType match { case SchemaColumnDataType.Int => SchemaColumnFixed(name, value.convertTo[Int]) case SchemaColumnDataType.Long => SchemaColumnFixed(name, value.convertTo[Long]) case SchemaColumnDataType.Float => SchemaColumnFixed(name, value.convertTo[Float]) case SchemaColumnDataType.Double => SchemaColumnFixed(name, value.convertTo[Double]) case SchemaColumnDataType.Date => SchemaColumnFixed(name, value.convertTo[Date]) case SchemaColumnDataType.Timestamp => SchemaColumnFixed(name, value.convertTo[Timestamp]) case SchemaColumnDataType.String => SchemaColumnFixed(name, value.convertTo[String]) case SchemaColumnDataType.Boolean => SchemaColumnFixed(name, value.convertTo[Boolean]) case _ => deserializationError(s"unsupported data_type: $dataType for ${SchemaColumnType.Fixed}") } } override def write(obj: SchemaColumnFixed[_]): YamlValue = ??? } }
Example 8
Source File: YamlParserTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class YamlParserTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._ import net.jcazevedo.moultingyaml._ "YamlParser" must { "convert a YamlDate to java.sql.Date" in { val date = "1998-06-03" val string = s"""$date""".stripMargin string.parseYaml.convertTo[Date] mustBe Date.valueOf(date) } "convert a YamlDate to java.sql.Timestamp" in { val timestamp = "1998-06-03 01:23:45" val string = s"""$timestamp""".stripMargin string.parseYaml.convertTo[Timestamp] mustBe Timestamp.valueOf(timestamp) } } }
Example 9
Source File: SchemaColumnFixedTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class SchemaColumnFixedTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnFixedProtocol._ import net.jcazevedo.moultingyaml._ val name = "test" val column_type = "Fixed" val baseString = s"""name: $name |column_type: $column_type """.stripMargin "SchemaColumnFixed" must { "read an Int column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Int} |value: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1) } "read a Long column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Long} |value: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1l) } "read a Float column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Float} |value: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1f) } "read a Double column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Double} |value: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, 1d) } "read a Date column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Date} |value: 1998-06-03 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Date.valueOf("1998-06-03")) } "read a Timestamp column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Timestamp} |value: 1998-06-03 01:23:45 """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, Timestamp.valueOf("1998-06-03 01:23:45")) } "read a String column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.String} |value: test """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, "test") } "read a Boolean column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Boolean} |value: true """.stripMargin string.parseYaml.convertTo[SchemaColumnFixed[_]] mustBe SchemaColumnFixed(name, true) } } }
Example 10
Source File: SchemaColumnSequentialTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker.schema.table.columns import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class SchemaColumnSequentialTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.schema.table.columns.SchemaColumnSequentialProtocol._ import net.jcazevedo.moultingyaml._ val name = "test" val column_type = "Sequential" val baseString = s"""name: $name |column_type: $column_type """.stripMargin "SchemaColumnSequential" must { "read an Int column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Int} |start: 1 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1, 1) } "read a Long column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Long} |start: 1 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1l, 1l) } "read a Float column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Float} |start: 1.0 |step: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1f, 1f) } "read a Double column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Double} |start: 1.0 |step: 1.0 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, 1d, 1d) } "read a Date column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Date} |start: 1998-06-03 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Date.valueOf("1998-06-03"), 1) } "read a Timestamp column" in { val string = s"""$baseString |data_type: ${SchemaColumnDataType.Timestamp} |start: 1998-06-03 01:23:45 |step: 1 """.stripMargin string.parseYaml.convertTo[SchemaColumnSequential[_]] mustBe SchemaColumnSequential(name, Timestamp.valueOf("1998-06-03 01:23:45"), 1) } } }
Example 11
Source File: ArgsParserTest.scala From data-faker with MIT License | 5 votes |
package com.dunnhumby.datafaker import java.sql.{Date, Timestamp} import org.scalatest.{MustMatchers, WordSpec} class ArgsParserTest extends WordSpec with MustMatchers { import com.dunnhumby.datafaker.YamlParser.YamlParserProtocol._ import net.jcazevedo.moultingyaml._ "ArgsParser" must { "accepts --file arg" in { ArgsParser.parseArgs(List("--file", "test")) mustBe Map("file" -> "test") } "accepts --database arg" in { ArgsParser.parseArgs(List("--database", "test")) mustBe Map("database" -> "test") } } }
Example 12
Source File: literals.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ object Literal { def apply(v: Any): Literal = v match { case i: Int => Literal(i, IntegerType) case l: Long => Literal(l, LongType) case d: Double => Literal(d, DoubleType) case f: Float => Literal(f, FloatType) case b: Byte => Literal(b, ByteType) case s: Short => Literal(s, ShortType) case s: String => Literal(UTF8String(s), StringType) case b: Boolean => Literal(b, BooleanType) case d: BigDecimal => Literal(Decimal(d), DecimalType.Unlimited) case d: java.math.BigDecimal => Literal(Decimal(d), DecimalType.Unlimited) case d: Decimal => Literal(d, DecimalType.Unlimited) case t: Timestamp => Literal(t, TimestampType) case d: Date => Literal(DateUtils.fromJavaDate(d), DateType) case a: Array[Byte] => Literal(a, BinaryType) case null => Literal(null, NullType) case _ => throw new RuntimeException("Unsupported literal type " + v.getClass + " " + v) } def create(v: Any, dataType: DataType): Literal = { Literal(CatalystTypeConverters.convertToCatalyst(v), dataType) } } case class Literal protected (value: Any, dataType: DataType) extends LeafExpression { override def foldable: Boolean = true override def nullable: Boolean = value == null override def toString: String = if (value != null) value.toString else "null" type EvaluatedType = Any override def eval(input: Row): Any = value } // TODO: Specialize case class MutableLiteral(var value: Any, dataType: DataType, nullable: Boolean = true) extends LeafExpression { type EvaluatedType = Any def update(expression: Expression, input: Row): Unit = { value = expression.eval(input) } override def eval(input: Row): Any = value }
Example 13
Source File: DateUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import java.sql.Date import java.text.SimpleDateFormat import java.util.{Calendar, TimeZone} import org.apache.spark.sql.catalyst.expressions.Cast object DateUtils { private val MILLIS_PER_DAY = 86400000 // Java TimeZone has no mention of thread safety. Use thread local instance to be safe. private val LOCAL_TIMEZONE = new ThreadLocal[TimeZone] { override protected def initialValue: TimeZone = { Calendar.getInstance.getTimeZone } } private def javaDateToDays(d: Date): Int = { millisToDays(d.getTime) } // we should use the exact day as Int, for example, (year, month, day) -> day def millisToDays(millisLocal: Long): Int = { ((millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal)) / MILLIS_PER_DAY).toInt } private def toMillisSinceEpoch(days: Int): Long = { val millisUtc = days.toLong * MILLIS_PER_DAY millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc) } def fromJavaDate(date: java.sql.Date): Int = { javaDateToDays(date) } def toJavaDate(daysSinceEpoch: Int): java.sql.Date = { new java.sql.Date(toMillisSinceEpoch(daysSinceEpoch)) } def toString(days: Int): String = Cast.threadLocalDateFormat.get.format(toJavaDate(days)) def stringToTime(s: String): java.util.Date = { if (!s.contains('T')) { // JDBC escape string if (s.contains(' ')) { java.sql.Timestamp.valueOf(s) } else { java.sql.Date.valueOf(s) } } else if (s.endsWith("Z")) { // this is zero timezone of ISO8601 stringToTime(s.substring(0, s.length - 1) + "GMT-00:00") } else if (s.indexOf("GMT") == -1) { // timezone with ISO8601 val inset = "+00.00".length val s0 = s.substring(0, s.length - inset) val s1 = s.substring(s.length - inset, s.length) if (s0.substring(s0.lastIndexOf(':')).contains('.')) { stringToTime(s0 + "GMT" + s1) } else { stringToTime(s0 + ".0GMT" + s1) } } else { // ISO8601 with GMT insert val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" ) ISO8601GMT.parse(s) } } }
Example 14
Source File: DataFrameDateSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.sql.{Date, Timestamp} class DataFrameDateTimeSuite extends QueryTest { private lazy val ctx = org.apache.spark.sql.test.TestSQLContext import ctx.implicits._ test("timestamp comparison with date strings") { val df = Seq( (1, Timestamp.valueOf("2015-01-01 00:00:00")), (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t") checkAnswer( df.select("t").filter($"t" <= "2014-06-01"), Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil) checkAnswer( df.select("t").filter($"t" >= "2014-06-01"), Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil) } test("date comparison with date strings") { val df = Seq( (1, Date.valueOf("2015-01-01")), (2, Date.valueOf("2014-01-01"))).toDF("i", "t") checkAnswer( df.select("t").filter($"t" <= "2014-06-01"), Row(Date.valueOf("2014-01-01")) :: Nil) checkAnswer( df.select("t").filter($"t" >= "2015"), Row(Date.valueOf("2015-01-01")) :: Nil) } }
Example 15
Source File: Encoders.scala From quill with Apache License 2.0 | 5 votes |
package io.getquill.context.jdbc import java.sql.{ Date, Timestamp, Types } import java.time.{ LocalDate, LocalDateTime } import java.util.{ Calendar, TimeZone } import java.{ sql, util } trait Encoders { this: JdbcContextBase[_, _] => type Encoder[T] = JdbcEncoder[T] protected val dateTimeZone = TimeZone.getDefault case class JdbcEncoder[T](sqlType: Int, encoder: BaseEncoder[T]) extends BaseEncoder[T] { override def apply(index: Index, value: T, row: PrepareRow) = encoder(index + 1, value, row) } def encoder[T](sqlType: Int, f: (Index, T, PrepareRow) => Unit): Encoder[T] = JdbcEncoder(sqlType, (index: Index, value: T, row: PrepareRow) => { f(index, value, row) row }) def encoder[T](sqlType: Int, f: PrepareRow => (Index, T) => Unit): Encoder[T] = encoder(sqlType, (index: Index, value: T, row: PrepareRow) => f(row)(index, value)) implicit def mappedEncoder[I, O](implicit mapped: MappedEncoding[I, O], e: Encoder[O]): Encoder[I] = JdbcEncoder(e.sqlType, mappedBaseEncoder(mapped, e.encoder)) private[this] val nullEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setNull) implicit def optionEncoder[T](implicit d: Encoder[T]): Encoder[Option[T]] = JdbcEncoder( d.sqlType, (index, value, row) => value match { case Some(v) => d.encoder(index, v, row) case None => nullEncoder.encoder(index, d.sqlType, row) } ) implicit val stringEncoder: Encoder[String] = encoder(Types.VARCHAR, _.setString) implicit val bigDecimalEncoder: Encoder[BigDecimal] = encoder(Types.NUMERIC, (index, value, row) => row.setBigDecimal(index, value.bigDecimal)) implicit val byteEncoder: Encoder[Byte] = encoder(Types.TINYINT, _.setByte) implicit val shortEncoder: Encoder[Short] = encoder(Types.SMALLINT, _.setShort) implicit val intEncoder: Encoder[Int] = encoder(Types.INTEGER, _.setInt) implicit val longEncoder: Encoder[Long] = encoder(Types.BIGINT, _.setLong) implicit val floatEncoder: Encoder[Float] = encoder(Types.FLOAT, _.setFloat) implicit val doubleEncoder: Encoder[Double] = encoder(Types.DOUBLE, _.setDouble) implicit val byteArrayEncoder: Encoder[Array[Byte]] = encoder(Types.VARBINARY, _.setBytes) implicit val dateEncoder: Encoder[util.Date] = encoder(Types.TIMESTAMP, (index, value, row) => row.setTimestamp(index, new sql.Timestamp(value.getTime), Calendar.getInstance(dateTimeZone))) implicit val localDateEncoder: Encoder[LocalDate] = encoder(Types.DATE, (index, value, row) => row.setDate(index, Date.valueOf(value), Calendar.getInstance(dateTimeZone))) implicit val localDateTimeEncoder: Encoder[LocalDateTime] = encoder(Types.TIMESTAMP, (index, value, row) => row.setTimestamp(index, Timestamp.valueOf(value), Calendar.getInstance(dateTimeZone))) }
Example 16
Source File: DateEncoderTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.record.encoder import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} import com.sksamuel.avro4s.{AvroSchema, DefaultFieldMapper, Encoder, ImmutableRecord} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers //noinspection ScalaDeprecation class DateEncoderTest extends AnyFunSuite with Matchers { test("encode LocalTime as TIME-MILLIS") { case class Foo(s: LocalTime) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(LocalTime.of(12, 50, 45))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(46245000000L))) } test("encode LocalDate as DATE") { case class Foo(s: LocalDate) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(LocalDate.of(2018, 9, 10))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784))) } test("encode java.sql.Date as DATE") { case class Foo(s: Date) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(Date.valueOf(LocalDate.of(2018, 9, 10)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(17784))) } test("encode LocalDateTime as timestamp-nanos") { case class Foo(s: LocalDateTime) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000000123L))) Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 123009))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739000123009L))) Encoder[Foo].encode(Foo(LocalDateTime.of(2018, 9, 10, 11, 58, 59, 328187943))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1536580739328187943L))) } test("encode Timestamp as TIMESTAMP-MILLIS") { case class Foo(s: Timestamp) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(Timestamp.from(Instant.ofEpochMilli(1538312231000L)))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L))) } test("encode Instant as TIMESTAMP-MILLIS") { case class Foo(s: Instant) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(Instant.ofEpochMilli(1538312231000L))) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(1538312231000L))) } }
Example 17
Source File: NetezzaFilters.scala From spark-netezza with Apache License 2.0 | 5 votes |
package com.ibm.spark.netezza import java.sql.{Date, Timestamp} import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.sources._ def generateFilterExpr(f: Filter): Option[String] = { Option(f match { case EqualTo(attr, value) => s"$attr = ${quoteValue(value)}" case EqualNullSafe(attr, value) => s"(NOT ($attr != ${quoteValue(value)} OR $attr IS NULL OR " + s"${quoteValue(value)} IS NULL) OR ($attr IS NULL AND ${quoteValue(value)} IS NULL))" case LessThan(attr, value) => s"$attr < ${quoteValue(value)}" case GreaterThan(attr, value) => s"$attr > ${quoteValue(value)}" case LessThanOrEqual(attr, value) => s"$attr <= ${quoteValue(value)}" case GreaterThanOrEqual(attr, value) => s"$attr >= ${quoteValue(value)}" case IsNull(attr) => s"$attr IS NULL" case IsNotNull(attr) => s"$attr IS NOT NULL" case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'" case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'" case StringContains(attr, value) => s"${attr} LIKE '%${value}%'" case In(attr, value) => s"$attr IN (${quoteValue(value)})" case Not(f) => generateFilterExpr(f).map(p => s"(NOT ($p))").getOrElse(null) case Or(f1, f2) => val or = Seq(f1, f2).flatMap(generateFilterExpr(_)) if (or.size == 2) { or.map(p => s"($p)").mkString(" OR ") } else { null } case And(f1, f2) => val and = Seq(f1, f2).flatMap(generateFilterExpr(_)) if (and.size == 2) { and.map(p => s"($p)").mkString(" AND ") } else { null } case _ => null }) } }
Example 18
Source File: DateSchemaTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.schema import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} import com.sksamuel.avro4s.AvroSchema import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers class DateSchemaTest extends AnyFunSuite with Matchers { test("generate date logical type for LocalDate") { case class LocalDateTest(date: LocalDate) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdate.json")) val schema = AvroSchema[LocalDateTest] schema.toString(true) shouldBe expected.toString(true) } test("generate date logical type for Date") { case class DateTest(date: Date) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/date.json")) val schema = AvroSchema[DateTest] schema.toString(true) shouldBe expected.toString(true) } test("generate time logical type for LocalTime") { case class LocalTimeTest(time: LocalTime) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localtime.json")) val schema = AvroSchema[LocalTimeTest] schema.toString(true) shouldBe expected.toString(true) } test("generate timestamp-nanos for LocalDateTime") { case class LocalDateTimeTest(time: LocalDateTime) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/localdatetime.json")) val schema = AvroSchema[LocalDateTimeTest] schema.toString(true) shouldBe expected.toString(true) } test("generate timestamp-millis logical type for Instant") { case class InstantTest(instant: Instant) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/instant.json")) val schema = AvroSchema[InstantTest] schema.toString(true) shouldBe expected.toString(true) } test("generate timestamp-millis logical type for Timestamp") { case class TimestampTest(ts: Timestamp) val expected = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream("/timestamp.json")) val schema = AvroSchema[TimestampTest] schema.toString(true) shouldBe expected.toString(true) } }
Example 19
Source File: TypeCast.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.datasource.google.spreadsheet import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.NumberFormat import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try object TypeCast { private[spreadsheet] def castTo( datum: String, castType: DataType, nullable: Boolean = true ): Any = { castType match { case _: ByteType => datum.toByte case _: ShortType => datum.toShort case _: IntegerType => datum.toInt case _: LongType => datum.toLong case _: FloatType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue()) case _: DoubleType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue()) case _: BooleanType => datum.toBoolean case _: DecimalType => new BigDecimal(datum.replaceAll(",", "")) case _: TimestampType => Timestamp.valueOf(datum) case _: DateType => Date.valueOf(datum) case _: StringType => datum case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } }
Example 20
Source File: MimirUDF.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import java.sql.{ Timestamp, Date } import org.apache.spark.sql.types.{ DataType, StructType, StructField } import mimir.algebra._ import mimir.exec.spark._ import mimir.util.SparkUtils class MimirUDF { def getPrimitive(t:Type, value:Any) = value match { case null => NullPrimitive() case _ => t match { //case TInt() => IntPrimitive(value.asInstanceOf[Long]) case TInt() => IntPrimitive(value.asInstanceOf[Long]) case TFloat() => FloatPrimitive(value.asInstanceOf[Double]) case TDate() => SparkUtils.convertDate(value.asInstanceOf[Date]) case TTimestamp() => SparkUtils.convertTimestamp(value.asInstanceOf[Timestamp]) case TString() => StringPrimitive(value.asInstanceOf[String]) case TBool() => BoolPrimitive(value.asInstanceOf[Boolean]) case TRowId() => RowIdPrimitive(value.asInstanceOf[String]) case TType() => TypePrimitive(Type.fromString(value.asInstanceOf[String])) //case TAny() => NullPrimitive() //case TUser(name) => name.toLowerCase //case TInterval() => Primitive(value.asInstanceOf[Long]) case _ => StringPrimitive(value.asInstanceOf[String]) } } def getNative(primitive : PrimitiveValue) : AnyRef = primitive match { case NullPrimitive() => null case RowIdPrimitive(s) => s case StringPrimitive(s) => s case IntPrimitive(i) => new java.lang.Long(i) case FloatPrimitive(f) => new java.lang.Double(f) case BoolPrimitive(b) => new java.lang.Boolean(b) case ts@TimestampPrimitive(y,m,d,h,mm,s,ms) => SparkUtils.convertTimestamp(ts) case dt@DatePrimitive(y,m,d) => SparkUtils.convertDate(dt) case x => x.asString } def getStructType(datatypes:Seq[DataType]): StructType = { StructType(datatypes.map(dti => StructField("", RAToSpark.getInternalSparkType(dti), true))) } }
Example 21
Source File: Tables.scala From s4ds with Apache License 2.0 | 5 votes |
import java.sql.Date import scala.slick.driver.MySQLDriver.simple._ object Tables { class Transactions(tag:Tag) extends Table[Transaction](tag, "transactions") { def id = column[Int]("id", O.PrimaryKey, O.AutoInc) def candidate = column[String]("candidate") def contributor = column[String]("contributor") def contributorState = column[String]("contributor_state", O.DBType("VARCHAR(2)")) def contributorOccupation = column[Option[String]]("contributor_occupation") def amount = column[Long]("amount") def date = column[Date]("date") def * = (id.?, candidate, contributor, contributorState, contributorOccupation, amount, date) <> ( Transaction.tupled, Transaction.unapply) } val transactions = TableQuery[Transactions] }
Example 22
Source File: FECData.scala From s4ds with Apache License 2.0 | 5 votes |
import java.io.File import java.sql.Date import java.text.SimpleDateFormat import com.github.tototoshi.csv._ object FECData { val DataDirectory = "./data/" private val dateParser = new SimpleDateFormat("DD-MMM-YY") private def load(fileName:String):FECData = { val reader = CSVReader.open(new File(DataDirectory + fileName)) val transactions = for { row <- reader.iteratorWithHeaders id = None candidate = row("candidate") contributor = row("contributor_name") state = row("contributor_state") occupation = row("contributor_occupation") match { case "" => None case v => Some(v) } amount = (row("amount").toDouble*100).toInt date = new Date(dateParser.parse(row("date")).getTime) } yield Transaction(id, candidate, contributor, state, occupation, amount, date) new FECData(transactions) } def loadAll:FECData = load("us.csv") def loadOhio:FECData = load("ohio.csv") } class FECData(val transactions:Iterator[Transaction])
Example 23
Source File: SlickDemo.scala From s4ds with Apache License 2.0 | 5 votes |
import java.sql.Date import scala.slick.driver.MySQLDriver.simple._ import scala.slick.jdbc.meta._ object SlickDemo { val db = Database.forURL( "jdbc:mysql://127.0.0.1:3306/test", driver="com.mysql.jdbc.Driver" ) def createTable:Unit = { db withSession { implicit session => if(! MTable.getTables("transactions").list.isEmpty) { Tables.transactions.ddl.drop } Tables.transactions.ddl.create } } def insertFecData:Unit = { val fecData = FECData.loadOhio val transactions = fecData.transactions db withSession { implicit session => val transactionBatches = transactions.grouped(100000) for (batch <- transactionBatches) { Tables.transactions ++= batch.toList } } } def queryData:List[Transaction] = { db withSession { implicit session => Tables.transactions.take(5).list } } def queryTotalDonations:Map[String, Long] = { db withSession { implicit session => val grouped = Tables.transactions.groupBy(_.candidate) val aggregated = grouped.map { case (candidate, group) => (candidate -> group.map(_.amount).sum) } aggregated.list.toMap.mapValues { _.getOrElse(0L) } } } def main(args:Array[String]) { createTable insertFecData queryData.foreach { println } queryTotalDonations.foreach { println } } }
Example 24
Source File: DataFramePrettyPrinter.scala From lighthouse with Apache License 2.0 | 5 votes |
package be.dataminded.lighthouse.testing import java.sql.Date import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.DataFrame import org.apache.spark.sql.catalyst.util.DateTimeUtils private[testing] object DataFramePrettyPrinter { def prettyPrintDataFrame(df: DataFrame, number: Int, truncate: Int = 20): String = { val numRows = number.max(0) val takeResult = df.take(numRows + 1) val hasMoreData = takeResult.length > numRows val data = takeResult.take(numRows) val header = df.schema.fieldNames.toSeq def asReadableRows = { data.map { row => row.toSeq.map { cell => val str = cell match { case null => "null" case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]") case array: Array[_] => array.mkString("[", ", ", "]") case seq: Seq[_] => seq.mkString("[", ", ", "]") case d: Date => DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d)) case _ => cell.toString } if (truncate > 0 && str.length > truncate) { // do not show ellipses for strings shorter than 4 characters. if (truncate < 4) str.substring(0, truncate) else str.substring(0, truncate - 3) + "..." } else { str } }: Seq[String] } } // For array values, replace Seq and Array with square brackets // For cells that are beyond `truncate` characters, replace it with the // first `truncate-3` and "..." val rows: Seq[Seq[String]] = header +: asReadableRows val sb = new StringBuilder // Initialise the width of each column to a minimum value of '3' val colWidths = Array.fill(header.length)(3) // Compute the width of each column for (row <- rows) { for ((cell, i) <- row.zipWithIndex) { colWidths(i) = math.max(colWidths(i), cell.length) } } // Create SeparateLine val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString() // column names rows.head.zipWithIndex .map { case (cell, i) => if (truncate > 0) { StringUtils.leftPad(cell, colWidths(i)) } else { StringUtils.rightPad(cell, colWidths(i)) } } .addString(sb, "|", "|", "|\n") sb.append(sep) // data rows.tail.map { _.zipWithIndex .map { case (cell, i) => if (truncate > 0) { StringUtils.leftPad(cell.toString, colWidths(i)) } else { StringUtils.rightPad(cell.toString, colWidths(i)) } } .addString(sb, "|", "|", "|\n") } sb.append(sep) // For Data that has more than "numRows" records if (hasMoreData) { val rowsString = if (numRows == 1) "row" else "rows" sb.append(s"only showing top $numRows $rowsString\n") } sb.toString() } }
Example 25
Source File: InsertMysqlDemo.scala From spark_mysql with Apache License 2.0 | 5 votes |
import java.sql.{Date, Timestamp} import InsertMysqlDemo.CardMember import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} import utils.MySQLUtils /** * Created with IntelliJ IDEA. * Author: [email protected] * Description:DataFrame 中数据存入到MySQL * Date: Created in 2018-11-17 12:39 */ object InsertMysqlDemo { case class CardMember(m_id: String, card_type: String, expire: Timestamp, duration: Int, is_sale: Boolean, date: Date, user: Long, salary: Float) def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local[*]").setAppName(getClass.getSimpleName).set("spark.testing.memory", "3147480000") val sparkContext = new SparkContext(conf) val hiveContext = new SQLContext(sparkContext) import hiveContext.implicits._ val memberSeq = Seq( CardMember("member_2", "月卡", new Timestamp(System.currentTimeMillis()), 31, false, new Date(System.currentTimeMillis()), 123223, 0.32f), CardMember("member_1", "季卡", new Timestamp(System.currentTimeMillis()), 93, false, new Date(System.currentTimeMillis()), 124224, 0.362f) ) val memberDF = memberSeq.toDF() MySQLUtils.saveDFtoDBCreateTableIfNotExist("member_test", memberDF) MySQLUtils.insertOrUpdateDFtoDBUsePool("member_test", memberDF, Array("user", "salary")) MySQLUtils.getDFFromMysql(hiveContext, "", null) sparkContext.stop() } }
Example 26
Source File: Utils.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.datasources import java.sql.{Date, Timestamp} import org.apache.hadoop.hbase.spark.AvroSerdes import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private object Utils { def hbaseFieldToScalaType( f: Field, src: Array[Byte], offset: Int, length: Int): Any = { if (f.exeSchema.isDefined) { // If we have avro schema defined, use it to get record, and then convert them to catalyst data type val m = AvroSerdes.deserialize(src, f.exeSchema.get) val n = f.avroToCatalyst.map(_(m)) n.get } else { // Fall back to atomic type f.dt match { case BooleanType => src(offset) != 0 case ByteType => src(offset) case ShortType => Bytes.toShort(src, offset) case IntegerType => Bytes.toInt(src, offset) case LongType => Bytes.toLong(src, offset) case FloatType => Bytes.toFloat(src, offset) case DoubleType => Bytes.toDouble(src, offset) case DateType => new Date(Bytes.toLong(src, offset)) case TimestampType => new Timestamp(Bytes.toLong(src, offset)) case StringType => UTF8String.fromBytes(src, offset, length) case BinaryType => val newArray = new Array[Byte](length) System.arraycopy(src, offset, newArray, 0, length) newArray // TODO: SparkSqlSerializer.deserialize[Any](src) case _ => throw new Exception(s"unsupported data type ${f.dt}") } } } // convert input to data type def toBytes(input: Any, field: Field): Array[Byte] = { if (field.schema.isDefined) { // Here we assume the top level type is structType val record = field.catalystToAvro(input) AvroSerdes.serialize(record, field.schema.get) } else { field.dt match { case BooleanType => Bytes.toBytes(input.asInstanceOf[Boolean]) case ByteType => Array(input.asInstanceOf[Number].byteValue) case ShortType => Bytes.toBytes(input.asInstanceOf[Number].shortValue) case IntegerType => Bytes.toBytes(input.asInstanceOf[Number].intValue) case LongType => Bytes.toBytes(input.asInstanceOf[Number].longValue) case FloatType => Bytes.toBytes(input.asInstanceOf[Number].floatValue) case DoubleType => Bytes.toBytes(input.asInstanceOf[Number].doubleValue) case DateType | TimestampType => Bytes.toBytes(input.asInstanceOf[java.util.Date].getTime) case StringType => Bytes.toBytes(input.toString) case BinaryType => input.asInstanceOf[Array[Byte]] case _ => throw new Exception(s"unsupported data type ${field.dt}") } } } }
Example 27
Source File: TypeCast.scala From spark-google-spreadsheets with Apache License 2.0 | 5 votes |
package com.github.potix2.spark.google.spreadsheets.util import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.NumberFormat import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try object TypeCast { private[spreadsheets] def castTo( datum: String, castType: DataType, nullable: Boolean = true ): Any = { castType match { case _: ByteType => datum.toByte case _: ShortType => datum.toShort case _: IntegerType => datum.toInt case _: LongType => datum.toLong case _: FloatType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue()) case _: DoubleType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue()) case _: BooleanType => datum.toBoolean case _: DecimalType => new BigDecimal(datum.replaceAll(",", "")) case _: TimestampType => Timestamp.valueOf(datum) case _: DateType => Date.valueOf(datum) case _: StringType => datum case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } }
Example 28
Source File: DateColumnBuffer.scala From spark-vector with Apache License 2.0 | 5 votes |
package com.actian.spark_vector.colbuffer.singles import java.nio.ByteBuffer import java.sql.Date import java.util.Calendar import com.actian.spark_vector.colbuffer._ import com.actian.spark_vector.vector.VectorDataType private class DateColumnBuffer(p: ColumnBufferBuildParams) extends ColumnBuffer[Date, Int](p.name, p.maxValueCount, DateSize, DateSize, p.nullable) { private final val DaysBeforeEpoch = 719528 private final val JulianBoundary = 578101 private final val CenturyDays = 36524 override def put(source: Date, buffer: ByteBuffer): Unit = { val cal = Calendar.getInstance cal.set(source.getYear, source.getMonth, source.getDate()) val dayOfYear = cal.get(Calendar.DAY_OF_YEAR) val year = source.getYear + 1900 // Need to convert to proleptic gregorian calendar date var days = (year * 365) + ((year - 1) / 4) - (year / 100) + (year / 400) + dayOfYear // Need to adjust for error in Jan-Feb of certain century years if (year % 100 == 0 && year % 400 != 0 && dayOfYear < 61) days += 1 buffer.putInt(days) } override def get(buffer: ByteBuffer): Int = { val days = buffer.getInt() var offset = 0 // Need to convert from proleptic gregorian to julian if date before 1582/10/14 if (days < JulianBoundary) { val n = (days - 366) / CenturyDays offset = n - (n / 4 + 2) // Need to adjust for error in Jan-Feb of certain century years val cdays = days % CenturyDays val qdays = days % (CenturyDays * 4) if (qdays > 365 && cdays < 366 && cdays > (59 + n / 4)) { offset += 1 } } days - DaysBeforeEpoch + offset } } private[colbuffer] object DateColumnBuffer extends ColumnBufferBuilder { override private[colbuffer] val build: PartialFunction[ColumnBufferBuildParams, ColumnBuffer[_, _]] = ofDataType(VectorDataType.DateType) andThen { new DateColumnBuffer(_) } }
Example 29
Source File: RddToDataFrame.scala From spark-sframe with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.turi import org.graphlab.create.GraphLabUtil import org.apache.spark.sql.{SQLContext, Row, DataFrame} import org.apache.spark.rdd.RDD import scala.collection.JavaConversions._ import org.apache.spark.sql.types._ import scala.collection.mutable.ListBuffer import scala.collection.mutable.ArrayBuffer import scala.collection.immutable.Map import java.util.HashMap import java.util.ArrayList import java.util.{Date,GregorianCalendar} import java.sql.Date object EvaluateRDD { def inferSchema(obj: Any): DataType = { if(obj.isInstanceOf[Int]) { IntegerType } else if(obj.isInstanceOf[String]) { StringType } else if(obj.isInstanceOf[Double]) { DoubleType } else if(obj.isInstanceOf[Long]) { LongType } else if(obj.isInstanceOf[Float]) { FloatType } else if(obj.isInstanceOf[Map[_,_]]) { MapType(inferSchema(obj.asInstanceOf[Map[_,_]].head._1),inferSchema(obj.asInstanceOf[Map[_,_]].head._2)) } else if(obj.isInstanceOf[java.util.HashMap[_,_]]) { MapType(inferSchema(obj.asInstanceOf[java.util.HashMap[_,_]].head._1),inferSchema(obj.asInstanceOf[java.util.HashMap[_,_]].head._2)) } else if(obj.isInstanceOf[Array[_]]) { ArrayType(inferSchema(obj.asInstanceOf[Array[_]](0))) } else if(obj.isInstanceOf[java.util.ArrayList[_]]) { ArrayType(inferSchema(obj.asInstanceOf[java.util.ArrayList[_]](0))) } else if(obj.isInstanceOf[java.util.GregorianCalendar]) { TimestampType } else if(obj.isInstanceOf[java.util.Date] || obj.isInstanceOf[java.sql.Date]) { DateType } else { StringType } } def toScala(obj: Any): Any = { if (obj.isInstanceOf[java.util.HashMap[_,_]]) { val jmap = obj.asInstanceOf[java.util.HashMap[_,_]] jmap.map { case (k,v) => toScala(k) -> toScala(v) }.toMap } else if(obj.isInstanceOf[java.util.ArrayList[_]]) { val buf = ArrayBuffer[Any]() val jArray = obj.asInstanceOf[java.util.ArrayList[_]] for(item <- jArray) { buf += toScala(item) } buf.toArray } else if(obj.isInstanceOf[java.util.GregorianCalendar]) { new java.sql.Timestamp(obj.asInstanceOf[java.util.GregorianCalendar].getTime().getTime()) } else { obj } } def toSparkDataFrame(sqlContext: SQLContext, rdd: RDD[java.util.HashMap[String,_]]): DataFrame = { val scalaRDD = rdd.map(l => toScala(l)) val rowRDD = scalaRDD.map(l => Row.fromSeq(l.asInstanceOf[Map[_,_]].values.toList)) var sample_data: java.util.HashMap[String,_] = rdd.take(1)(0) var schema_list: ListBuffer[StructField] = new ListBuffer[StructField]() for ((name,v) <- sample_data) { schema_list.append(StructField(name,inferSchema(v))) } sqlContext.createDataFrame(rowRDD,StructType(schema_list)) } }
Example 30
Source File: PredicatePushdownSuite.scala From spark-exasol-connector with Apache License 2.0 | 5 votes |
package com.exasol.spark import java.sql.Timestamp import org.apache.spark.sql.functions.col import com.holdenkarau.spark.testing.DataFrameSuiteBase import org.scalatest.funsuite.AnyFunSuite class PredicatePushdownSuite extends AnyFunSuite with BaseDockerSuite with DataFrameSuiteBase { test("with where clause build from filters: filter") { createDummyTable() import spark.implicits._ val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE") .load() .filter($"id" < 3) .filter(col("city").like("Ber%")) .select("id", "city") val result = df.collect().map(x => (x.getLong(0), x.getString(1))).toSet assert(result.size === 1) assert(result === Set((1, "Berlin"))) } test("with where clause build from filters: createTempView and spark.sql") { createDummyTable() val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE") .load() df.createOrReplaceTempView("myTable") val myDF = spark .sql("SELECT id, city FROM myTable WHERE id BETWEEN 1 AND 3 AND name < 'Japan'") val result = myDF.collect().map(x => (x.getLong(0), x.getString(1))).toSet assert(result.size === 2) assert(result === Set((1, "Berlin"), (2, "Paris"))) } test("date and timestamp should be read and filtered correctly") { import java.sql.Date createDummyTable() val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT date_info, updated_at FROM $EXA_SCHEMA.$EXA_TABLE") .load() val minTimestamp = Timestamp.valueOf("2017-12-30 00:00:00.0000") val testDate = Date.valueOf("2017-12-31") val resultDate = df.collect().map(_.getDate(0)) assert(resultDate.contains(testDate)) val resultTimestamp = df.collect().map(_.getTimestamp(1)).map(x => x.after(minTimestamp)) assert(!resultTimestamp.contains(false)) val filteredByDateDF = df.filter(col("date_info") === testDate) assert(filteredByDateDF.count() === 1) val filteredByTimestampDF = df.filter(col("updated_at") < minTimestamp) assert(filteredByTimestampDF.count() === 0) } test("count should be performed successfully") { createDummyTable() val df = spark.read .format("exasol") .option("host", container.host) .option("port", s"${container.port}") .option("query", s"SELECT * FROM $EXA_SCHEMA.$EXA_TABLE") .load() val result = df.count() assert(result === 3) } }
Example 31
Source File: SnowflakeWriter.scala From spark-snowflake with Apache License 2.0 | 5 votes |
package net.snowflake.spark.snowflake import java.sql.{Date, Timestamp} import net.snowflake.client.jdbc.internal.apache.commons.codec.binary.Base64 import net.snowflake.spark.snowflake.Parameters.MergedParameters import net.snowflake.spark.snowflake.io.SupportedFormat import net.snowflake.spark.snowflake.io.SupportedFormat.SupportedFormat import org.apache.spark.rdd.RDD import org.apache.spark.sql.types._ import org.apache.spark.sql._ private def removeUselessColumns(dataFrame: DataFrame, params: MergedParameters): DataFrame = params.columnMap match { case Some(map) => // Enclose column name with backtick(`) if dot(.) exists in column name val names = map.keys.toSeq.map(name => if (name.contains(".")) { s"`$name`" } else { name }) try { dataFrame.select(names.head, names.tail: _*) } catch { case e: AnalysisException => throw new IllegalArgumentException( "Incorrect column name when column mapping: " + e.toString ) } case _ => dataFrame } // Prepare a set of conversion functions, based on the schema def genConversionFunctions(schema: StructType): Array[Any => Any] = schema.fields.map { field => field.dataType match { case DateType => (v: Any) => v match { case null => "" case t: Timestamp => Conversions.formatTimestamp(t) case d: Date => Conversions.formatDate(d) } case TimestampType => (v: Any) => { if (v == null) "" else Conversions.formatTimestamp(v.asInstanceOf[Timestamp]) } case StringType => (v: Any) => { if (v == null) "" else Conversions.formatString(v.asInstanceOf[String]) } case BinaryType => (v: Any) => v match { case null => "" case bytes: Array[Byte] => Base64.encodeBase64String(bytes) } case _ => (v: Any) => Conversions.formatAny(v) } } } object DefaultSnowflakeWriter extends SnowflakeWriter(DefaultJDBCWrapper)
Example 32
Source File: Transaction.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import java.sql.{Date, Timestamp} import java.time.ZoneOffset case class Transaction(timestamp: Timestamp, date: Date, tid: Int, price: Double, sell: Boolean, amount: Double) object Transaction { def apply(timestamp: Timestamp, tid: Int, price: Double, sell: Boolean, amount: Double) = new Transaction( timestamp = timestamp, date = Date.valueOf( timestamp.toInstant.atOffset(ZoneOffset.UTC).toLocalDate), tid = tid, price = price, sell = sell, amount = amount) }
Example 33
Source File: LiteralGenerator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.scalacheck.{Arbitrary, Gen} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval object LiteralGenerator { lazy val byteLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType) lazy val shortLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType) lazy val integerLiteralGen: Gen[Literal] = for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType) lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) lazy val floatLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) } yield Literal.create(f, DoubleType) // TODO cache the generated data def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = { assert(scale >= 0) assert(precision >= scale) Arbitrary.arbBigInt.arbitrary.map { s => val a = (s % BigInt(10).pow(precision - scale)).toString() val b = (s % BigInt(10).pow(scale)).abs.toString() Literal.create( Decimal(BigDecimal(s"$a.$b"), precision, scale), DecimalType(precision, scale)) } } lazy val stringLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType) lazy val binaryLiteralGen: Gen[Literal] = for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) } yield Literal.create(ab.toArray, BinaryType) lazy val booleanLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType) lazy val dateLiteralGen: Gen[Literal] = for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType) lazy val timestampLiteralGen: Gen[Literal] = for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType) lazy val calendarIntervalLiterGen: Gen[Literal] = for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary} yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType) // Sometimes, it would be quite expensive when unlimited value is used, // for example, the `times` arguments for StringRepeat would hang the test 'forever' // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited // range is more reasonable lazy val limitedIntegerLiteralGen: Gen[Literal] = for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType) def randomGen(dt: DataType): Gen[Literal] = { dt match { case ByteType => byteLiteralGen case ShortType => shortLiteralGen case IntegerType => integerLiteralGen case LongType => longLiteralGen case DoubleType => doubleLiteralGen case FloatType => floatLiteralGen case DateType => dateLiteralGen case TimestampType => timestampLiteralGen case BooleanType => booleanLiteralGen case StringType => stringLiteralGen case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } }
Example 34
Source File: Condition.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow import java.sql.Date trait Condition[T<:Execution] { def matches(pg: T): Boolean; } class AndCondition[T<:Execution](con1: Condition[T], con2: Condition[T]) extends Condition[T] { override def matches(pg: T): Boolean = { con1.matches(pg) && con2.matches(pg); } } class OrCondition[T<:Execution](con1: Condition[T], con2: Condition[T]) extends Condition[T] { override def matches(pg: T): Boolean = { con1.matches(pg) || con2.matches(pg); } } trait ComposableCondition[T<:Execution] extends Condition[T] { def and(others: Condition[T]*): ComposableCondition[T] = { new ComposableCondition[T]() { override def matches(pg: T): Boolean = { (this +: others).reduce((x, y) => new AndCondition(x, y)).matches(pg); } } } def or(others: Condition[T]*): ComposableCondition[T] = { new ComposableCondition[T]() { override def matches(pg: T): Boolean = { (this +: others).reduce((x, y) => new OrCondition(x, y)).matches(pg); } } } } object Condition { def AlwaysTrue[T<:Execution]() = new Condition[T]() { def matches(pg: T): Boolean = true; } def after[T<:Execution](processName: String, otherProcessNames: String*) = new ComposableCondition[T] { def matches(pg: T): Boolean = { val processNames = processName +: otherProcessNames; return processNames.map(pg.isEntryCompleted(_)) .filter(_ == true).length == processNames.length; } } def after[T<:Execution](when: Date) = new ComposableCondition[T] { def matches(pg: T): Boolean = { return new Date(System.currentTimeMillis()).after(when); } } }
Example 35
Source File: FilterPushdown.scala From spark-select with Apache License 2.0 | 5 votes |
package io.minio.spark.select import java.sql.{Date, Timestamp} import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ private def getTypeForAttribute(schema: StructType, attribute: String): Option[DataType] = { if (schema.fieldNames.contains(attribute)) { Some(schema(attribute).dataType) } else { None } } def queryFromSchema(schema: StructType, filters: Array[Filter]): String = { var columnList = schema.fields.map(x => s"s."+s""""${x.name}"""").mkString(",") if (columnList.length == 0) { columnList = "*" } val whereClause = buildWhereClause(schema, filters) if (whereClause.length == 0) { s"select $columnList from S3Object s" } else { s"select $columnList from S3Object s $whereClause" } } }
Example 36
Source File: TypeCast.scala From spark-select with Apache License 2.0 | 5 votes |
package io.minio.spark.select.util import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.{SimpleDateFormat, NumberFormat} import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try @throws[IllegalArgumentException] private[select] def toChar(str: String): Char = { if (str.charAt(0) == '\\') { str.charAt(1) match { case 't' => '\t' case 'r' => '\r' case 'b' => '\b' case 'f' => '\f' case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options case '\'' => '\'' case 'u' if str == """\u0000""" => '\u0000' case _ => throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str") } } else if (str.length == 1) { str.charAt(0) } else { throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str") } } }
Example 37
Source File: LiteralGenerator.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.scalacheck.{Arbitrary, Gen} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval object LiteralGenerator { lazy val byteLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbByte.arbitrary } yield Literal.create(b, ByteType) lazy val shortLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbShort.arbitrary } yield Literal.create(s, ShortType) lazy val integerLiteralGen: Gen[Literal] = for { i <- Arbitrary.arbInt.arbitrary } yield Literal.create(i, IntegerType) lazy val longLiteralGen: Gen[Literal] = for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType) lazy val floatLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2, Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity) } yield Literal.create(f, FloatType) lazy val doubleLiteralGen: Gen[Literal] = for { f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2, Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity) } yield Literal.create(f, DoubleType) // TODO cache the generated data def decimalLiteralGen(precision: Int, scale: Int): Gen[Literal] = { assert(scale >= 0) assert(precision >= scale) Arbitrary.arbBigInt.arbitrary.map { s => val a = (s % BigInt(10).pow(precision - scale)).toString() val b = (s % BigInt(10).pow(scale)).abs.toString() Literal.create( Decimal(BigDecimal(s"$a.$b"), precision, scale), DecimalType(precision, scale)) } } lazy val stringLiteralGen: Gen[Literal] = for { s <- Arbitrary.arbString.arbitrary } yield Literal.create(s, StringType) lazy val binaryLiteralGen: Gen[Literal] = for { ab <- Gen.listOf[Byte](Arbitrary.arbByte.arbitrary) } yield Literal.create(ab.toArray, BinaryType) lazy val booleanLiteralGen: Gen[Literal] = for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType) lazy val dateLiteralGen: Gen[Literal] = for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType) lazy val timestampLiteralGen: Gen[Literal] = for { t <- Arbitrary.arbLong.arbitrary } yield Literal.create(new Timestamp(t), TimestampType) lazy val calendarIntervalLiterGen: Gen[Literal] = for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary} yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType) // Sometimes, it would be quite expensive when unlimited value is used, // for example, the `times` arguments for StringRepeat would hang the test 'forever' // if it's tested against Int.MaxValue by ScalaCheck, therefore, use values from a limited // range is more reasonable lazy val limitedIntegerLiteralGen: Gen[Literal] = for { i <- Gen.choose(-100, 100) } yield Literal.create(i, IntegerType) def randomGen(dt: DataType): Gen[Literal] = { dt match { case ByteType => byteLiteralGen case ShortType => shortLiteralGen case IntegerType => integerLiteralGen case LongType => longLiteralGen case DoubleType => doubleLiteralGen case FloatType => floatLiteralGen case DateType => dateLiteralGen case TimestampType => timestampLiteralGen case BooleanType => booleanLiteralGen case StringType => stringLiteralGen case BinaryType => binaryLiteralGen case CalendarIntervalType => calendarIntervalLiterGen case DecimalType.Fixed(precision, scale) => decimalLiteralGen(precision, scale) case dt => throw new IllegalArgumentException(s"not supported type $dt") } } }
Example 38
Source File: SortOrderExpressionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.util.TimeZone import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._ class SortOrderExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SortPrefix") { val b1 = Literal.create(false, BooleanType) val b2 = Literal.create(true, BooleanType) val i1 = Literal.create(20132983, IntegerType) val i2 = Literal.create(-20132983, IntegerType) val l1 = Literal.create(20132983, LongType) val l2 = Literal.create(-20132983, LongType) val millis = 1524954911000L; // Explicitly choose a time zone, since Date objects can create different values depending on // local time zone of the machine on which the test is running val oldDefaultTZ = TimeZone.getDefault val d1 = try { TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) Literal.create(new java.sql.Date(millis), DateType) } finally { TimeZone.setDefault(oldDefaultTZ) } val t1 = Literal.create(new Timestamp(millis), TimestampType) val f1 = Literal.create(0.7788229f, FloatType) val f2 = Literal.create(-0.7788229f, FloatType) val db1 = Literal.create(0.7788229d, DoubleType) val db2 = Literal.create(-0.7788229d, DoubleType) val s1 = Literal.create("T", StringType) val s2 = Literal.create("This is longer than 8 characters", StringType) val bin1 = Literal.create(Array[Byte](12), BinaryType) val bin2 = Literal.create(Array[Byte](12, 17, 99, 0, 0, 0, 2, 3, 0xf4.asInstanceOf[Byte]), BinaryType) val dec1 = Literal(Decimal(20132983L, 10, 2)) val dec2 = Literal(Decimal(20132983L, 19, 2)) val dec3 = Literal(Decimal(20132983L, 21, 2)) val list1 = Literal(List(1, 2), ArrayType(IntegerType)) val nullVal = Literal.create(null, IntegerType) checkEvaluation(SortPrefix(SortOrder(b1, Ascending)), 0L) checkEvaluation(SortPrefix(SortOrder(b2, Ascending)), 1L) checkEvaluation(SortPrefix(SortOrder(i1, Ascending)), 20132983L) checkEvaluation(SortPrefix(SortOrder(i2, Ascending)), -20132983L) checkEvaluation(SortPrefix(SortOrder(l1, Ascending)), 20132983L) checkEvaluation(SortPrefix(SortOrder(l2, Ascending)), -20132983L) // For some reason, the Literal.create code gives us the number of days since the epoch checkEvaluation(SortPrefix(SortOrder(d1, Ascending)), 17649L) checkEvaluation(SortPrefix(SortOrder(t1, Ascending)), millis * 1000) checkEvaluation(SortPrefix(SortOrder(f1, Ascending)), DoublePrefixComparator.computePrefix(f1.value.asInstanceOf[Float].toDouble)) checkEvaluation(SortPrefix(SortOrder(f2, Ascending)), DoublePrefixComparator.computePrefix(f2.value.asInstanceOf[Float].toDouble)) checkEvaluation(SortPrefix(SortOrder(db1, Ascending)), DoublePrefixComparator.computePrefix(db1.value.asInstanceOf[Double])) checkEvaluation(SortPrefix(SortOrder(db2, Ascending)), DoublePrefixComparator.computePrefix(db2.value.asInstanceOf[Double])) checkEvaluation(SortPrefix(SortOrder(s1, Ascending)), StringPrefixComparator.computePrefix(s1.value.asInstanceOf[UTF8String])) checkEvaluation(SortPrefix(SortOrder(s2, Ascending)), StringPrefixComparator.computePrefix(s2.value.asInstanceOf[UTF8String])) checkEvaluation(SortPrefix(SortOrder(bin1, Ascending)), BinaryPrefixComparator.computePrefix(bin1.value.asInstanceOf[Array[Byte]])) checkEvaluation(SortPrefix(SortOrder(bin2, Ascending)), BinaryPrefixComparator.computePrefix(bin2.value.asInstanceOf[Array[Byte]])) checkEvaluation(SortPrefix(SortOrder(dec1, Ascending)), 20132983L) checkEvaluation(SortPrefix(SortOrder(dec2, Ascending)), 2013298L) checkEvaluation(SortPrefix(SortOrder(dec3, Ascending)), DoublePrefixComparator.computePrefix(201329.83d)) checkEvaluation(SortPrefix(SortOrder(list1, Ascending)), 0L) checkEvaluation(SortPrefix(SortOrder(nullVal, Ascending)), null) } }
Example 39
Source File: NodeType.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.types import java.sql.Date import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.unsafe.types.UTF8String class NodeType extends UserDefinedType[Node] { override val sqlType = StructType(Seq( StructField("path", ArrayType(StringType, containsNull = false), nullable = false), StructField("dataType", StringType, nullable = false), StructField("preRank", IntegerType, nullable = true), StructField("postRank", IntegerType, nullable = true), StructField("isLeaf", BooleanType, nullable = true), StructField("ordPath", ArrayType(LongType, containsNull=false), nullable = true) )) override def serialize(obj: Any): Any = obj match { case node: Node => InternalRow(new GenericArrayData(node.path.map { case null => null case p => UTF8String.fromString(p.toString) }), UTF8String.fromString(node.pathDataTypeJson), node.preRank, node.postRank, node.isLeaf, if (node.ordPath == null){ node.ordPath } else { new GenericArrayData(node.ordPath) }) case _ => throw new UnsupportedOperationException(s"Cannot serialize ${obj.getClass}") } // scalastyle:off cyclomatic.complexity override def deserialize(datum: Any): Node = datum match { case row: InternalRow => { val stringArray = row.getArray(0).toArray[UTF8String](StringType).map { case null => null case somethingElse => somethingElse.toString } val readDataTypeString: String = row.getString(1) val readDataType: DataType = DataType.fromJson(readDataTypeString) val path: Seq[Any] = readDataType match { case StringType => stringArray case LongType => stringArray.map(v => if (v != null) v.toLong else null) case IntegerType => stringArray.map(v => if (v != null) v.toInt else null) case DoubleType => stringArray.map(v => if (v != null) v.toDouble else null) case FloatType => stringArray.map(v => if (v != null) v.toFloat else null) case ByteType => stringArray.map(v => if (v != null) v.toByte else null) case BooleanType => stringArray.map(v => if (v != null) v.toBoolean else null) case TimestampType => stringArray.map(v => if (v != null) v.toLong else null) case dt: DataType => sys.error(s"Type $dt not supported for hierarchy path") } val preRank: Integer = if (row.isNullAt(2)) null else row.getInt(2) val postRank: Integer = if (row.isNullAt(3)) null else row.getInt(3) // scalastyle:off magic.number val isLeaf: java.lang.Boolean = if (row.isNullAt(4)) null else row.getBoolean(4) val ordPath: Seq[Long] = if (row.isNullAt(5)) null else row.getArray(5).toLongArray() // scalastyle:on magic.number Node( path, readDataTypeString, preRank, postRank, isLeaf, ordPath ) } case node: Node => node case _ => throw new UnsupportedOperationException(s"Cannot deserialize ${datum.getClass}") } // scalastyle:on override def userClass: java.lang.Class[Node] = classOf[Node] } case object NodeType extends NodeType
Example 40
Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0 | 5 votes |
package org.zuinnote.spark.office.excel import java.math.BigDecimal import java.sql.Date import java.sql.Timestamp import java.text.DateFormat import java.text.SimpleDateFormat import java.util.Calendar import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.NullWritable import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.mapreduce.RecordWriter import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow } import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.OutputWriter import org.apache.spark.sql.types._ import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil import org.zuinnote.hadoop.office.format.mapreduce._ import org.apache.commons.logging.LogFactory import org.apache.commons.logging.Log import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import java.util.Locale import java.text.DecimalFormat import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO import java.text.NumberFormat // NOTE: This class is instantiated and used on executor side only, no need to be serializable. private[excel] class ExcelOutputWriter( path: String, dataSchema: StructType, context: TaskAttemptContext, options: Map[String, String]) extends OutputWriter { def write(row: Row): Unit = { // check useHeader if (useHeader) { val headers = row.schema.fieldNames var i = 0 for (x <- headers) { val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName) recordWriter.write(NullWritable.get(), headerColumnSCD) i += 1 } currentRowNum += 1 useHeader = false } // for each value in the row if (row.size>0) { var currentColumnNum = 0; val simpleObject = new Array[AnyRef](row.size) for (i <- 0 to row.size - 1) { // for each element of the row val obj = row.get(i) if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) { val formattedValue = obj.asInstanceOf[Seq[String]](0) val comment = obj.asInstanceOf[Seq[String]](1) val formula = obj.asInstanceOf[Seq[String]](2) val address = obj.asInstanceOf[Seq[String]](3) val sheetName = obj.asInstanceOf[Seq[String]](4) simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName) } else { simpleObject(i)=obj.asInstanceOf[AnyRef] } } // convert row to spreadsheetcellDAO val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum) // write it for (x<- spreadSheetCellDAORow) { recordWriter.write(NullWritable.get(), x) } } currentRowNum += 1 } override def close(): Unit = { recordWriter.close(context) currentRowNum = 0; } }
Example 41
Source File: SpecificPrimitivesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.sql.{Date, Timestamp} import java.util.UUID class SpecificPrimitivesSpec extends Specification { "A case class with an `Int` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest00(1) val record2 = AvroTypeProviderTest00(2) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Float` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest01(1F) val record2 = AvroTypeProviderTest01(2F) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Long` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest02(1L) val record2 = AvroTypeProviderTest02(2L) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Double` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest03(1D) val record2 = AvroTypeProviderTest03(2D) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Boolean` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest04(true) val record2 = AvroTypeProviderTest04(false) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `String` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest05("hello world") val record2 = AvroTypeProviderTest05("hello galaxy") val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Null` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest06(null) val record2 = AvroTypeProviderTest06(null) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with an `Array[Bytes]` field" should { "deserialize correctly" in { val record1 = AvroTypeProviderTest69("hello world".getBytes) val record2 = AvroTypeProviderTest69("hello galaxy".getBytes) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } "A case class with a `logicalType` fields from .avsc" should { "deserialize correctly" in { val t1 = System.currentTimeMillis() val t2 = System.currentTimeMillis() val record1 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t1), UUID.randomUUID()) val record2 = LogicalSc(BigDecimal(10.0).setScale(8), new Timestamp(Long.MaxValue), new Date(t2), UUID.randomUUID()) val records = List(record1, record2) SpecificTestUtil.verifyWriteAndRead(records) } } }
Example 42
Source File: UtilsTest.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
import java.sql.Date import org.apache.spark.SparkConf import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.SparkSession import org.junit.Assert import org.junit.Test import java.io.ByteArrayOutputStream import java.io.InputStream import org.apache.commons.io.IOUtils import com.esotericsoftware.kryo.io.Input import org.apache.spark.sql.execution.streaming.http.KryoSerializerUtils class UtilsTest { @Test def testKryoSerDe() { val d1 = new Date(30000); val bytes = KryoSerializerUtils.serialize(d1); val d2 = KryoSerializerUtils.deserialize(bytes); Assert.assertEquals(d1, d2); val d3 = Map('x' -> Array("aaa", "bbb"), 'y' -> Array("ccc", "ddd")); println(d3); val bytes2 = KryoSerializerUtils.serialize(d3); val d4 = KryoSerializerUtils.deserialize(bytes2).asInstanceOf[Map[String, Any]]; println(d4); } @Test def testEncoderSchema() { val spark = SparkSession.builder.master("local[4]") .getOrCreate(); val sqlContext = spark.sqlContext; import sqlContext.implicits._ import org.apache.spark.sql.catalyst.encoders.encoderFor val schema1 = encoderFor[String].schema; val schema2 = encoderFor[(String)].schema; val schema3 = encoderFor[((String))].schema; Assert.assertEquals(schema1, schema2); Assert.assertEquals(schema1, schema3); } @Test def testDateInTuple() { val spark = SparkSession.builder.master("local[4]") .getOrCreate(); val sqlContext = spark.sqlContext; import sqlContext.implicits._ val d1 = new Date(30000); val ds = sqlContext.createDataset(Seq[(Int, Date)]((1, d1))); val d2 = ds.collect()(0)._2; //NOTE: d1!=d2, maybe a bug println(d1.equals(d2)); } }
Example 43
Source File: CubeWriterHelper.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.driver.writer import java.sql.{Date, Timestamp} import akka.event.slf4j.SLF4JLogging import com.stratio.sparta.driver.factory.SparkContextFactory import com.stratio.sparta.driver.step.Cube import com.stratio.sparta.sdk.pipeline.aggregation.cube.{DimensionValue, DimensionValuesTime, MeasuresValues} import com.stratio.sparta.sdk.pipeline.output.Output import com.stratio.sparta.sdk.pipeline.schema.TypeOp import org.apache.spark.sql._ import org.apache.spark.streaming.dstream.DStream object CubeWriterHelper extends SLF4JLogging { def writeCube(cube: Cube, outputs: Seq[Output], stream: DStream[(DimensionValuesTime, MeasuresValues)]): Unit = { stream.map { case (dimensionValuesTime, measuresValues) => toRow(cube, dimensionValuesTime, measuresValues) }.foreachRDD(rdd => { if (!rdd.isEmpty()) { val sparkSession = SparkContextFactory.sparkSessionInstance val cubeDf = sparkSession.createDataFrame(rdd, cube.schema) val extraOptions = Map(Output.TableNameKey -> cube.name) val cubeAutoCalculatedFieldsDf = WriterHelper.write(cubeDf, cube.writerOptions, extraOptions, outputs) TriggerWriterHelper.writeTriggers(cubeAutoCalculatedFieldsDf, cube.triggers, cube.name, outputs) } else log.debug("Empty event received") }) } private[driver] def toRow(cube: Cube, dimensionValuesT: DimensionValuesTime, measures: MeasuresValues): Row = { val measuresSorted = measuresValuesSorted(measures.values) val rowValues = dimensionValuesT.timeConfig match { case None => val dimensionValues = dimensionsValuesSorted(dimensionValuesT.dimensionValues) dimensionValues ++ measuresSorted case Some(timeConfig) => val timeValue = Seq(timeFromDateType(timeConfig.eventTime, cube.dateType)) val dimFilteredByTime = filterDimensionsByTime(dimensionValuesT.dimensionValues, timeConfig.timeDimension) val dimensionValues = dimensionsValuesSorted(dimFilteredByTime) ++ timeValue val measuresValuesWithTime = measuresSorted dimensionValues ++ measuresValuesWithTime } Row.fromSeq(rowValues) } private[driver] def dimensionsValuesSorted(dimensionValues: Seq[DimensionValue]): Seq[Any] = dimensionValues.sorted.map(dimVal => dimVal.value) private[driver] def measuresValuesSorted(measures: Map[String, Option[Any]]): Seq[Any] = measures.toSeq.sortWith(_._1 < _._1).map(measure => measure._2.getOrElse(null)) private[driver] def filterDimensionsByTime(dimensionValues: Seq[DimensionValue], timeDimension: String): Seq[DimensionValue] = dimensionValues.filter(dimensionValue => dimensionValue.dimension.name != timeDimension) private[driver] def timeFromDateType(time: Long, dateType: TypeOp.Value): Any = { dateType match { case TypeOp.Date | TypeOp.DateTime => new Date(time) case TypeOp.Long => time case TypeOp.Timestamp => new Timestamp(time) case _ => time.toString } } }
Example 44
Source File: RDBDataTypeConverter.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.connector.jdbc.datatype import java.sql.{Date, ResultSet, Time, Timestamp} import java.util.Optional import oharastream.ohara.client.configurator.InspectApi.RdbColumn import oharastream.ohara.connector.jdbc.util.DateTimeUtils trait RDBDataTypeConverter { def converterValue(resultSet: ResultSet, column: RdbColumn): Any = { val columnName = column.name val typeName = column.dataType.toUpperCase val dataType: DataTypeEnum = converterDataType(column) dataType match { case DataTypeEnum.INTEGER => java.lang.Integer.valueOf(resultSet.getInt(columnName)) case DataTypeEnum.LONG => java.lang.Long.valueOf(resultSet.getLong(columnName)) case DataTypeEnum.BOOLEAN => java.lang.Boolean.valueOf(resultSet.getBoolean(columnName)) case DataTypeEnum.FLOAT => java.lang.Float.valueOf(resultSet.getFloat(columnName)) case DataTypeEnum.DOUBLE => java.lang.Double.valueOf(resultSet.getDouble(columnName)) case DataTypeEnum.BIGDECIMAL => Optional.ofNullable(resultSet.getBigDecimal(columnName)).orElseGet(() => new java.math.BigDecimal(0L)) case DataTypeEnum.STRING => Optional.ofNullable(resultSet.getString(columnName)).orElseGet(() => "null") case DataTypeEnum.DATE => Optional.ofNullable(resultSet.getDate(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Date(0)) case DataTypeEnum.TIME => Optional.ofNullable(resultSet.getTime(columnName, DateTimeUtils.CALENDAR)).orElseGet(() => new Time(0)) case DataTypeEnum.TIMESTAMP => Optional .ofNullable(resultSet.getTimestamp(columnName, DateTimeUtils.CALENDAR)) .orElseGet(() => new Timestamp(0)) case DataTypeEnum.BYTES => Optional.ofNullable(resultSet.getBytes(columnName)).orElseGet(() => Array()) case _ => throw new UnsupportedOperationException( s"JDBC Source Connector not support ${typeName} data type in ${columnName} column for ${dataBaseProductName} implement." ) } } protected[datatype] def dataBaseProductName: String protected[datatype] def converterDataType(column: RdbColumn): DataTypeEnum }
Example 45
Source File: JdbcIT.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.test import java.sql.Date import org.apache.livy.test.framework.BaseThriftIntegrationTestSuite class JdbcIT extends BaseThriftIntegrationTestSuite { test("basic JDBC test") { withConnection { c => checkQuery( c, "select 1, 'a', cast(null as int), 1.2345, CAST('2018-08-06' as date)") { resultSet => resultSet.next() assert(resultSet.getInt(1) == 1) assert(resultSet.getString(2) == "a") assert(resultSet.getInt(3) == 0) assert(resultSet.wasNull()) assert(resultSet.getDouble(4) == 1.2345) assert(resultSet.getDate(5) == Date.valueOf("2018-08-06")) assert(!resultSet.next()) } checkQuery( c, "select cast(null as string), cast(null as decimal), cast(null as double), " + "cast(null as date), null") { resultSetWithNulls => resultSetWithNulls.next() assert(resultSetWithNulls.getString(1) == null) assert(resultSetWithNulls.wasNull()) assert(resultSetWithNulls.getBigDecimal(2) == null) assert(resultSetWithNulls.wasNull()) assert(resultSetWithNulls.getDouble(3) == 0.0) assert(resultSetWithNulls.wasNull()) assert(resultSetWithNulls.getDate(4) == null) assert(resultSetWithNulls.wasNull()) assert(resultSetWithNulls.getString(5) == null) assert(resultSetWithNulls.wasNull()) assert(!resultSetWithNulls.next()) } checkQuery( c, "select array(1.5, 2.4, 1.3), struct('a', 1, 1.5), map(1, 'a', 2, 'b')") { resultSet => resultSet.next() assert(resultSet.getString(1) == "[1.5,2.4,1.3]") assert(resultSet.getString(2) == "{\"col1\":\"a\",\"col2\":1,\"col3\":1.5}") assert(resultSet.getString(3) == "{1:\"a\",2:\"b\"}") assert(!resultSet.next()) } } } }
Example 46
Source File: SQLInterpreter.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.repl import java.lang.reflect.InvocationTargetException import java.sql.Date import scala.util.control.NonFatal import org.apache.spark.SparkConf import org.apache.spark.sql.Row import org.apache.spark.sql.SparkSession import org.json4s._ import org.json4s.JsonAST.{JNull, JString} import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.livy.Logging import org.apache.livy.rsc.RSCConf import org.apache.livy.rsc.driver.SparkEntries class SQLInterpreter( sparkConf: SparkConf, rscConf: RSCConf, sparkEntries: SparkEntries) extends Interpreter with Logging { case object DateSerializer extends CustomSerializer[Date](_ => ( { case JString(s) => Date.valueOf(s) case JNull => null }, { case d: Date => JString(d.toString) })) private implicit def formats: Formats = DefaultFormats + DateSerializer private var spark: SparkSession = null private val maxResult = rscConf.getInt(RSCConf.Entry.SQL_NUM_ROWS) override def kind: String = "sql" override def start(): Unit = { require(!sparkEntries.sc().sc.isStopped) spark = sparkEntries.sparkSession() } override protected[repl] def execute(code: String): Interpreter.ExecuteResponse = { try { val result = spark.sql(code) val schema = parse(result.schema.json) // Get the row data val rows = result.take(maxResult) .map { _.toSeq.map { // Convert java BigDecimal type to Scala BigDecimal, because current version of // Json4s doesn't support java BigDecimal as a primitive type (LIVY-455). case i: java.math.BigDecimal => BigDecimal(i) case e => e } } val jRows = Extraction.decompose(rows) Interpreter.ExecuteSuccess( APPLICATION_JSON -> (("schema" -> schema) ~ ("data" -> jRows))) } catch { case e: InvocationTargetException => warn(s"Fail to execute query $code", e.getTargetException) val cause = e.getTargetException Interpreter.ExecuteError("Error", cause.getMessage, cause.getStackTrace.map(_.toString)) case NonFatal(f) => warn(s"Fail to execute query $code", f) Interpreter.ExecuteError("Error", f.getMessage, f.getStackTrace.map(_.toString)) } } override def close(): Unit = { } }
Example 47
Source File: MergeProjection.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.mutation.merge import java.sql.{Date, Timestamp} import org.apache.spark.sql.{CarbonDatasourceHadoopRelation, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, GenericRowWithSchema, InterpretedMutableProjection, Projection} import org.apache.spark.sql.catalyst.util.DateTimeUtils case class MergeProjection( @transient tableCols: Seq[String], @transient statusCol : String, @transient ds: Dataset[Row], @transient rltn: CarbonDatasourceHadoopRelation, @transient sparkSession: SparkSession, @transient mergeAction: MergeAction) { private val cutOffDate = Integer.MAX_VALUE >> 1 val isUpdate = mergeAction.isInstanceOf[UpdateAction] val isDelete = mergeAction.isInstanceOf[DeleteAction] def apply(row: GenericRowWithSchema): InternalRow = { // TODO we can avoid these multiple conversions if this is added as a SparkPlan node. val values = row.values.map { case s: String => org.apache.spark.unsafe.types.UTF8String.fromString(s) case d: java.math.BigDecimal => org.apache.spark.sql.types.Decimal.apply(d) case b: Array[Byte] => org.apache.spark.unsafe.types.UTF8String.fromBytes(b) case d: Date => DateTimeUtils.fromJavaDate(d) case t: Timestamp => DateTimeUtils.fromJavaTimestamp(t) case value => value } projection(new GenericInternalRow(values)).asInstanceOf[GenericInternalRow] } val (projection, output) = generateProjection private def generateProjection: (Projection, Array[Expression]) = { val existingDsOutput = rltn.carbonRelation.schema.toAttributes val colsMap = mergeAction match { case UpdateAction(updateMap) => updateMap case InsertAction(insertMap) => insertMap case _ => null } if (colsMap != null) { val output = new Array[Expression](tableCols.length) val expecOutput = new Array[Expression](tableCols.length) colsMap.foreach { case (k, v) => val tableIndex = tableCols.indexOf(k.toString().toLowerCase) if (tableIndex < 0) { throw new CarbonMergeDataSetException(s"Mapping is wrong $colsMap") } output(tableIndex) = v.expr.transform { case a: Attribute if !a.resolved => ds.queryExecution.analyzed.resolveQuoted(a.name, sparkSession.sessionState.analyzer.resolver).get } expecOutput(tableIndex) = existingDsOutput.find(_.name.equalsIgnoreCase(tableCols(tableIndex))).get } if (output.contains(null)) { throw new CarbonMergeDataSetException(s"Not all columns are mapped") } (new InterpretedMutableProjection(output++Seq( ds.queryExecution.analyzed.resolveQuoted(statusCol, sparkSession.sessionState.analyzer.resolver).get), ds.queryExecution.analyzed.output), expecOutput) } else { (null, null) } } }
Example 48
Source File: DateDataTypeNullDataTest.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.directdictionary import java.sql.Date import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HiveContext import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties import org.apache.spark.sql.test.util.QueryTest class DateDataTypeNullDataTest extends QueryTest with BeforeAndAfterAll { var hiveContext: HiveContext = _ override def beforeAll { try { sql( """CREATE TABLE IF NOT EXISTS timestampTyeNullData (ID Int, dateField date, country String, name String, phonetype String, serialname String, salary Int) STORED AS carbondata""" ) CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd") val csvFilePath = s"$resourcesPath/datasamplenull.csv" sql("LOAD DATA LOCAL INPATH '" + csvFilePath + "' INTO TABLE timestampTyeNullData").collect(); } catch { case x: Throwable => x.printStackTrace() CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) } } test("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null") { checkAnswer( sql("SELECT max(dateField) FROM timestampTyeNullData where dateField is not null"), Seq(Row(Date.valueOf("2015-07-23")) ) ) } test("SELECT * FROM timestampTyeNullData where dateField is null") { checkAnswer( sql("SELECT dateField FROM timestampTyeNullData where dateField is null"), Seq(Row(null) )) } override def afterAll { sql("drop table timestampTyeNullData") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") } }
Example 49
Source File: DateDataTypeDirectDictionaryWithOffHeapSortDisabledTest.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.directdictionary import java.sql.Date import org.apache.spark.sql.Row import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.common.constants.LoggerAction import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties class DateDataTypeDirectDictionaryWithOffHeapSortDisabledTest extends QueryTest with BeforeAndAfterAll { private val originOffHeapSortStatus: String = CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.ENABLE_OFFHEAP_SORT, CarbonCommonConstants.ENABLE_OFFHEAP_SORT_DEFAULT) override def beforeAll { try { CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true") CarbonProperties.getInstance().addProperty( CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name()) CarbonProperties.getInstance().addProperty(CarbonCommonConstants.ENABLE_OFFHEAP_SORT, "false") sql("drop table if exists directDictionaryTable ") sql("CREATE TABLE if not exists directDictionaryTable (empno int,doj date, salary int) " + "STORED AS carbondata") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd") val csvFilePath = s"$resourcesPath/datasamplefordate.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS" + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')" ) } catch { case x: Throwable => x.printStackTrace() CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) } } test("test direct dictionary for not null condition") { checkAnswer(sql("select doj from directDictionaryTable where doj is not null"), Seq(Row(Date.valueOf("2016-03-14")), Row(Date.valueOf("2016-04-14")))) } override def afterAll { sql("drop table directDictionaryTable") CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") CarbonProperties.getInstance().addProperty(CarbonCommonConstants.ENABLE_OFFHEAP_SORT, originOffHeapSortStatus) } }
Example 50
Source File: DateDataTypeDirectDictionaryWithNoDictTestCase.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.directdictionary import java.io.File import java.sql.Date import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HiveContext import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties import org.apache.spark.sql.test.util.QueryTest class DateDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with BeforeAndAfterAll { var hiveContext: HiveContext = _ override def beforeAll { try { sql("drop table if exists directDictionaryTable") CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true") sql( """ CREATE TABLE IF NOT EXISTS directDictionaryTable (empno String, doj Date, salary Int) STORED AS carbondata """ ) CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy-MM-dd") val csvFilePath = s"$resourcesPath/datasample.csv" println(csvFilePath) sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS" + "('DELIMITER'= ',', 'QUOTECHAR'= '\"')"); } catch { case x: Throwable => x.printStackTrace() CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) } } test("select doj from directDictionaryTable") { sql("select doj from directDictionaryTable") checkAnswer( sql("select doj from directDictionaryTable"), Seq(Row(Date.valueOf("2016-03-14")), Row(Date.valueOf("2016-04-14")), Row(null) ) ) } test("select doj from directDictionaryTable with equals filter") { sql("select doj from directDictionaryTable where doj='2016-03-14 15:00:09'") checkAnswer( sql("select doj from directDictionaryTable where doj='2016-03-14'"), Seq(Row(Date.valueOf("2016-03-14"))) ) } test("select doj from directDictionaryTable with greater than filter") { sql("select doj from directDictionaryTable where doj>'2016-03-14 15:00:09'") checkAnswer( sql("select doj from directDictionaryTable where doj>'2016-03-14 15:00:09'"), Seq(Row(Date.valueOf("2016-04-14"))) ) } override def afterAll { sql("drop table directDictionaryTable") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false") } }
Example 51
Source File: ReservationDao.scala From ddd-leaven-akka-v2 with MIT License | 5 votes |
package ecommerce.sales.view import java.sql.Date import ecommerce.sales.ReservationStatus import ReservationStatus.ReservationStatus import pl.newicom.dddd.aggregate.EntityId import slick.jdbc.meta.MTable._ import scala.concurrent.ExecutionContext import slick.jdbc.JdbcProfile class ReservationDao(implicit val profile: JdbcProfile, ec: ExecutionContext) { import profile.api._ implicit val reservationStatusColumnType = MappedColumnType.base[ReservationStatus, String]( { c => c.toString }, { s => ReservationStatus.withName(s)} ) val ReservationsTableName = "reservations" class Reservations(tag: Tag) extends Table[ReservationView](tag, ReservationsTableName) { def id = column[EntityId]("ID", O.PrimaryKey) def clientId = column[EntityId]("CLIENT_ID") def status = column[ReservationStatus]("STATUS") def createDate = column[Date]("CREATE_DATE") def * = (id, clientId, status, createDate) <> (ReservationView.tupled, ReservationView.unapply) } val reservations = TableQuery[Reservations] def createOrUpdate(view: ReservationView) = { reservations.insertOrUpdate(view) } def updateStatus(viewId: EntityId, status: ReservationStatus.Value) = { reservations.filter(_.id === viewId).map(_.status).update(status) } def all = reservations.result def byId(id: EntityId) = by_id(id).result.headOption def byClientId(clientId: EntityId) = by_client_id(clientId).result def remove(id: EntityId) = by_id(id).delete def ensureSchemaDropped = getTables(ReservationsTableName).headOption.flatMap { case Some(table) => reservations.schema.drop.map(_ => ()) case None => DBIO.successful(()) } def ensureSchemaCreated = getTables(ReservationsTableName).headOption.flatMap { case Some(table) => DBIO.successful(()) case None => reservations.schema.create.map(_ => ()) } }
Example 52
Source File: ReservationProjection.scala From ddd-leaven-akka-v2 with MIT License | 5 votes |
package ecommerce.sales.view import java.sql.Date import ecommerce.sales.ReservationStatus._ import ecommerce.sales._ import org.joda.time.DateTime.now import pl.newicom.dddd.messaging.event.OfficeEventMessage import pl.newicom.dddd.view.sql.Projection import pl.newicom.dddd.view.sql.Projection.ProjectionAction import slick.dbio.DBIOAction import slick.dbio.Effect.Write import scala.concurrent.ExecutionContext class ReservationProjection(dao: ReservationDao)(implicit ec: ExecutionContext) extends Projection { override def consume(eventMessage: OfficeEventMessage): ProjectionAction[Write] = { eventMessage.event match { case ReservationCreated(id, clientId) => val newView = ReservationView(id.value, clientId, Opened, new Date(now().getMillis)) dao.createOrUpdate(newView) case ReservationConfirmed(id, clientId, _) => dao.updateStatus(id.value, Confirmed) case ReservationCanceled(id) => dao.updateStatus(id.value, Canceled) case ReservationClosed(id) => dao.updateStatus(id.value, Closed) case ProductReserved(id, product, quantity) => // TODO handle DBIOAction.successful(()) } } }
Example 53
Source File: ReservationProjectionSpec.scala From ddd-leaven-akka-v2 with MIT License | 5 votes |
package ecommerce.sales.view import java.sql.Date import com.typesafe.config.{Config, ConfigFactory} import ecommerce.sales._ import ecommerce.sales.ReservationStatus.{Confirmed, Opened} import org.joda.time.DateTime.now import org.scalatest._ import pl.newicom.dddd.messaging.event.OfficeEventMessage import pl.newicom.dddd.office.CaseRef import scala.concurrent.ExecutionContext.Implicits.global class ReservationProjectionSpec extends WordSpecLike with Matchers with ViewTestSupport { override def config: Config = ConfigFactory.load() val dao = new ReservationDao val projection = new ReservationProjection(dao) "ReservationProjection" should { "consume ReservationCreated event" in { // When projection.consume(ReservationCreated(new ReservationId("reservation-1"), "client-1")).run() // Then val reservation = dao.byId("reservation-1").result assert(reservation.map(_.status) == Some(Opened)) } } "ReservationProjection" should { "consume ReservationConfirmed event" in { // Given dao.createOrUpdate(ReservationView("reservation-1", "client-1", Opened, new Date(now.getMillis))).run() // When projection.consume(ReservationConfirmed(new ReservationId("reservation-1"), "client-1", Money(10))).run() // Then val reservation = dao.byId("reservation-1").result assert(reservation.map(_.status) == Some(Confirmed)) } } override def ensureSchemaDropped = dao.ensureSchemaDropped override def ensureSchemaCreated = dao.ensureSchemaCreated implicit def toEventMessage(event: ReservationCreated): OfficeEventMessage = OfficeEventMessage(CaseRef(event.reservationId.value, ReservationOfficeId, None), event) implicit def toEventMessage(event: ReservationConfirmed): OfficeEventMessage = OfficeEventMessage(CaseRef(event.reservationId.value, ReservationOfficeId, None), event) }
Example 54
Source File: ReservationViewEndpointSpec.scala From ddd-leaven-akka-v2 with MIT License | 5 votes |
package ecommerce.sales.app import java.sql.Date import akka.http.scaladsl.model.StatusCodes.NotFound import akka.http.scaladsl.server._ import akka.http.scaladsl.testkit.ScalatestRouteTest import com.typesafe.config.ConfigFactory import ecommerce.sales.view.{ReservationDao, ReservationView, ViewTestSupport} import ecommerce.sales.{ReservationStatus, SalesSerializationHintsProvider} import org.joda.time.DateTime._ import org.json4s.Formats import org.scalatest.{BeforeAndAfter, Matchers, WordSpecLike} import pl.newicom.dddd.serialization.JsonSerHints._ import pl.newicom.dddd.utils.UUIDSupport.uuid7 class ReservationViewEndpointSpec extends WordSpecLike with Matchers with ScalatestRouteTest with ViewTestSupport with BeforeAndAfter { override lazy val config = ConfigFactory.load implicit val formats: Formats = new SalesSerializationHintsProvider().hints() lazy val dao = new ReservationDao val reservationId = uuid7 before { viewStore.run { dao.createOrUpdate(ReservationView(reservationId, "client-1", ReservationStatus.Opened, new Date(now.getMillis))) }.futureValue } after { viewStore.run { dao.remove(reservationId) }.futureValue } "Reservation view endpoint" should { def response = responseAs[String] val route: Route = new ReservationViewEndpoint().route(viewStore) "respond to /reservation/all with all reservations" in { Get("/reservation/all") ~> route ~> check { response should include (reservationId) } } "respond to /reservation/{reservationId} with requested reservation" in { Get(s"/reservation/$reservationId") ~> route ~> check { response should include (reservationId) } } "respond to /reservation/{reservationId} with NotFound if reservation unknown" in { Get(s"/reservation/invalid") ~> route ~> check { status shouldBe NotFound } } } def ensureSchemaDropped = dao.ensureSchemaDropped def ensureSchemaCreated = dao.ensureSchemaCreated }
Example 55
Source File: LoadDataBenchmark.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import java.sql.{Connection, Date, DriverManager} import java.time.{Instant, LocalDate} import java.util.Properties import org.apache.spark.sql.types._ import com.github.mrpowers.spark.daria.sql.SparkSessionExt._ import org.apache.spark.sql.{SaveMode, SparkSession} import scala.util.Random // LoadDataBenchmark is written to test load data with CPU profiler // this feature is accessible in Ultimate version of IntelliJ IDEA // see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details object LoadDataBenchmark extends App { final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost") final val masterPort: String = sys.props.getOrElse("memsql.port", "5506") val spark: SparkSession = SparkSession .builder() .master("local") .config("spark.sql.shuffle.partitions", "1") .config("spark.driver.bindAddress", "localhost") .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}") .config("spark.datasource.memsql.database", "testdb") .getOrCreate() def jdbcConnection: Loan[Connection] = { val connProperties = new Properties() connProperties.put("user", "root") Loan( DriverManager.getConnection( s"jdbc:mysql://$masterHost:$masterPort", connProperties )) } def executeQuery(sql: String): Unit = { jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql))) } executeQuery("set global default_partitions_per_leaf = 2") executeQuery("drop database if exists testdb") executeQuery("create database testdb") def genRow(): (Long, Int, Double, String) = (Random.nextLong(), Random.nextInt(), Random.nextDouble(), Random.nextString(20)) val df = spark.createDF( List.fill(1000000)(genRow()), List(("LongType", LongType, true), ("IntType", IntegerType, true), ("DoubleType", DoubleType, true), ("StringType", StringType, true)) ) val start = System.nanoTime() df.write .format("memsql") .mode(SaveMode.Append) .save("testdb.batchinsert") val diff = System.nanoTime() - start println("Elapsed time: " + diff + "ns [CSV serialization] ") executeQuery("truncate testdb.batchinsert") val avroStart = System.nanoTime() df.write .format(DefaultSource.MEMSQL_SOURCE_NAME_SHORT) .mode(SaveMode.Append) .option(MemsqlOptions.LOAD_DATA_FORMAT, "Avro") .save("testdb.batchinsert") val avroDiff = System.nanoTime() - avroStart println("Elapsed time: " + avroDiff + "ns [Avro serialization] ") }
Example 56
Source File: BatchInsertBenchmark.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import java.sql.{Connection, Date, DriverManager} import java.time.LocalDate import java.util.Properties import org.apache.spark.sql.types._ import com.github.mrpowers.spark.daria.sql.SparkSessionExt._ import org.apache.spark.sql.{SaveMode, SparkSession} import scala.util.Random // BatchInsertBenchmark is written to test batch insert with CPU profiler // this feature is accessible in Ultimate version of IntelliJ IDEA // see https://www.jetbrains.com/help/idea/async-profiler.html#profile for more details object BatchInsertBenchmark extends App { final val masterHost: String = sys.props.getOrElse("memsql.host", "localhost") final val masterPort: String = sys.props.getOrElse("memsql.port", "5506") val spark: SparkSession = SparkSession .builder() .master("local") .config("spark.sql.shuffle.partitions", "1") .config("spark.driver.bindAddress", "localhost") .config("spark.datasource.memsql.ddlEndpoint", s"${masterHost}:${masterPort}") .config("spark.datasource.memsql.database", "testdb") .getOrCreate() def jdbcConnection: Loan[Connection] = { val connProperties = new Properties() connProperties.put("user", "root") Loan( DriverManager.getConnection( s"jdbc:mysql://$masterHost:$masterPort", connProperties )) } def executeQuery(sql: String): Unit = { jdbcConnection.to(conn => Loan(conn.createStatement).to(_.execute(sql))) } executeQuery("set global default_partitions_per_leaf = 2") executeQuery("drop database if exists testdb") executeQuery("create database testdb") def genDate() = Date.valueOf(LocalDate.ofEpochDay(LocalDate.of(2001, 4, 11).toEpochDay + Random.nextInt(10000))) def genRow(): (Long, Int, Double, String, Date) = (Random.nextLong(), Random.nextInt(), Random.nextDouble(), Random.nextString(20), genDate()) val df = spark.createDF( List.fill(1000000)(genRow()), List(("LongType", LongType, true), ("IntType", IntegerType, true), ("DoubleType", DoubleType, true), ("StringType", StringType, true), ("DateType", DateType, true)) ) val start = System.nanoTime() df.write .format("memsql") .option("tableKey.primary", "IntType") .option("onDuplicateKeySQL", "IntType = IntType") .mode(SaveMode.Append) .save("testdb.batchinsert") val diff = System.nanoTime() - start println("Elapsed time: " + diff + "ns") }