35 |
35 |
36 private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """) |
36 private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """) |
37 |
37 |
38 private val symbol = new Regex("""(?xs) |
38 private val symbol = new Regex("""(?xs) |
39 \\ < (?: |
39 \\ < (?: |
40 \^? [A-Za-z][A-Za-z0-9_']* | |
40 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* | |
41 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""") |
41 \^? ([A-Za-z][A-Za-z0-9_']*)? ) >?""") |
42 |
42 |
43 private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" + |
43 val regex_total = new Regex(plain + "|" + physical_newline + "|" + symbol + "| .") |
44 """ [\ud800-\udbff\ufffd] | \\<\^? """) |
|
45 |
|
46 val regex_total = |
|
47 new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .") |
|
48 |
44 |
49 |
45 |
50 /* basic matching */ |
46 /* basic matching */ |
51 |
47 |
52 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff') |
48 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) |
|
49 |
|
50 def is_malformed(s: Symbol): Boolean = |
|
51 s.length match { |
|
52 case 1 => |
|
53 val c = s(0) |
|
54 Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' |
|
55 case 2 => |
|
56 val c1 = s(0) |
|
57 val c2 = s(1) |
|
58 !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) |
|
59 case _ => !s.endsWith(">") |
|
60 } |
53 |
61 |
54 def is_physical_newline(s: Symbol): Boolean = |
62 def is_physical_newline(s: Symbol): Boolean = |
55 s == "\n" || s == "\r" || s == "\r\n" |
63 s == "\n" || s == "\r" || s == "\r\n" |
56 |
|
57 def is_malformed(s: Symbol): Boolean = |
|
58 !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches |
|
59 |
64 |
60 class Matcher(text: CharSequence) |
65 class Matcher(text: CharSequence) |
61 { |
66 { |
62 private val matcher = regex_total.pattern.matcher(text) |
67 private val matcher = regex_total.pattern.matcher(text) |
63 def apply(start: Int, end: Int): Int = |
68 def apply(start: Int, end: Int): Int = |