src/Pure/General/symbol.scala
changeset 48773 0e1bab274672
parent 48704 85a3de10567d
child 48774 c4bd5bb3ae69
equal deleted inserted replaced
48772:e46cd0d26481 48773:0e1bab274672
    35 
    35 
    36   private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
    36   private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
    37 
    37 
    38   private val symbol = new Regex("""(?xs)
    38   private val symbol = new Regex("""(?xs)
    39       \\ < (?:
    39       \\ < (?:
    40       \^? [A-Za-z][A-Za-z0-9_']* |
    40       \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* |
    41       \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
    41       \^? ([A-Za-z][A-Za-z0-9_']*)? ) >?""")
    42 
    42 
    43   private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
    43   val regex_total = new Regex(plain + "|" + physical_newline + "|" + symbol + "| .")
    44     """ [\ud800-\udbff\ufffd] | \\<\^? """)
       
    45 
       
    46   val regex_total =
       
    47     new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
       
    48 
    44 
    49 
    45 
    50   /* basic matching */
    46   /* basic matching */
    51 
    47 
    52   def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
    48   def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
       
    49 
       
    50   def is_malformed(s: Symbol): Boolean =
       
    51     s.length match {
       
    52       case 1 =>
       
    53         val c = s(0)
       
    54         Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
       
    55       case 2 =>
       
    56         val c1 = s(0)
       
    57         val c2 = s(1)
       
    58         !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
       
    59       case _ => !s.endsWith(">")
       
    60     }
    53 
    61 
    54   def is_physical_newline(s: Symbol): Boolean =
    62   def is_physical_newline(s: Symbol): Boolean =
    55     s == "\n" || s == "\r" || s == "\r\n"
    63     s == "\n" || s == "\r" || s == "\r\n"
    56 
       
    57   def is_malformed(s: Symbol): Boolean =
       
    58     !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
       
    59 
    64 
    60   class Matcher(text: CharSequence)
    65   class Matcher(text: CharSequence)
    61   {
    66   {
    62     private val matcher = regex_total.pattern.matcher(text)
    67     private val matcher = regex_total.pattern.matcher(text)
    63     def apply(start: Int, end: Int): Int =
    68     def apply(start: Int, end: Int): Int =