package ws.vinta.albedo.closures

import scala.util.matching.Regex

object StringFunctions extends Serializable {
  val wordPatternEngOnly = """\w\.\-_"""
  val wordPatternIncludeCJK = """\w\.\-_\p{InHiragana}\p{InKatakana}\p{InBopomofo}\p{InCJKCompatibilityIdeographs}\p{InCJKUnifiedIdeographs}"""

  val reExtractWords: Regex = s"([$wordPatternEngOnly]+)".r
  val reExtractWordsIncludeCJK: Regex = s"([$wordPatternIncludeCJK]+)".r
  val reExtractEmailDomain: Regex = s"@([$wordPatternEngOnly]+)".r

  def extractWords(text: String): List[String] = {
    reExtractWords.findAllIn(text).toList
  }

  def extractWordsIncludeCJK(text: String): List[String] = {
    reExtractWordsIncludeCJK.findAllIn(text).toList
  }

  def extractEmailDomain(email: String): String = {
    try {
      reExtractEmailDomain.findAllIn(email).matchData.toList(0).group(1)
    } catch {
      case _: IndexOutOfBoundsException => {
        email
      }
    }
  }
}