src/Pure/Thy/thy_syntax.ML
author wenzelm
Tue Feb 11 17:44:29 2014 +0100 (2014-02-11)
changeset 55431 e0f20a44ff9d
parent 55118 7df949045dc5
child 55708 f4b114070675
permissions -rw-r--r--
common Command.Chunk for command source and auxiliary files (static Symbol.Index without actual String content);
more informative type Blob, to allow markup reports;
wenzelm@29315
     1
(*  Title:      Pure/Thy/thy_syntax.ML
wenzelm@23726
     2
    Author:     Makarius
wenzelm@23726
     3
wenzelm@29315
     4
Superficial theory syntax: tokens and spans.
wenzelm@23726
     5
*)
wenzelm@23726
     6
wenzelm@29315
     7
signature THY_SYNTAX =
wenzelm@23726
     8
sig
wenzelm@36959
     9
  val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list
wenzelm@48768
    10
  val reports_of_tokens: Token.T list -> bool * (Position.report * string) list
wenzelm@40131
    11
  val present_token: Token.T -> Output.output
wenzelm@48878
    12
  datatype span_kind = Command of string * Position.T | Ignored | Malformed
wenzelm@48867
    13
  datatype span = Span of span_kind * Token.T list
wenzelm@27842
    14
  val span_kind: span -> span_kind
wenzelm@36959
    15
  val span_content: span -> Token.T list
wenzelm@48865
    16
  val present_span: span -> Output.output
wenzelm@46811
    17
  val parse_spans: Token.T list -> span list
wenzelm@54520
    18
  val resolve_files: (string -> Path.T * Position.T -> Token.file Exn.result list) -> span -> span
wenzelm@51225
    19
  datatype 'a element = Element of 'a * ('a element list * 'a) option
wenzelm@51321
    20
  val atom: 'a -> 'a element
wenzelm@51225
    21
  val map_element: ('a -> 'b) -> 'a element -> 'b element
wenzelm@51225
    22
  val flat_element: 'a element -> 'a list
wenzelm@51268
    23
  val last_element: 'a element -> 'a
wenzelm@51225
    24
  val parse_elements: span list -> span element list
wenzelm@23726
    25
end;
wenzelm@23726
    26
wenzelm@37216
    27
structure Thy_Syntax: THY_SYNTAX =
wenzelm@23726
    28
struct
wenzelm@23726
    29
wenzelm@23803
    30
(** tokens **)
wenzelm@23803
    31
wenzelm@23803
    32
(* parse *)
wenzelm@23726
    33
wenzelm@40523
    34
fun parse_tokens lexs pos =
wenzelm@46811
    35
  Source.of_string #>
wenzelm@46811
    36
  Symbol.source #>
wenzelm@46811
    37
  Token.source {do_recover = SOME false} (K lexs) pos #>
wenzelm@46811
    38
  Source.exhaust;
wenzelm@23726
    39
wenzelm@23726
    40
wenzelm@23803
    41
(* present *)
wenzelm@23726
    42
wenzelm@23726
    43
local
wenzelm@23726
    44
wenzelm@23726
    45
val token_kind_markup =
wenzelm@50201
    46
 fn Token.Command       => (Markup.command, "")
wenzelm@50201
    47
  | Token.Keyword       => (Markup.keyword, "")
wenzelm@48768
    48
  | Token.Ident         => (Markup.empty, "")
wenzelm@48768
    49
  | Token.LongIdent     => (Markup.empty, "")
wenzelm@48768
    50
  | Token.SymIdent      => (Markup.empty, "")
wenzelm@50201
    51
  | Token.Var           => (Markup.var, "")
wenzelm@50201
    52
  | Token.TypeIdent     => (Markup.tfree, "")
wenzelm@50201
    53
  | Token.TypeVar       => (Markup.tvar, "")
wenzelm@48768
    54
  | Token.Nat           => (Markup.empty, "")
wenzelm@48768
    55
  | Token.Float         => (Markup.empty, "")
wenzelm@50201
    56
  | Token.String        => (Markup.string, "")
wenzelm@50201
    57
  | Token.AltString     => (Markup.altstring, "")
wenzelm@50201
    58
  | Token.Verbatim      => (Markup.verbatim, "")
wenzelm@55033
    59
  | Token.Cartouche     => (Markup.cartouche, "")
wenzelm@48768
    60
  | Token.Space         => (Markup.empty, "")
wenzelm@50201
    61
  | Token.Comment       => (Markup.comment, "")
wenzelm@48768
    62
  | Token.InternalValue => (Markup.empty, "")
wenzelm@50201
    63
  | Token.Error msg     => (Markup.bad, msg)
wenzelm@50201
    64
  | Token.Sync          => (Markup.control, "")
wenzelm@50201
    65
  | Token.EOF           => (Markup.control, "");
wenzelm@23726
    66
wenzelm@37192
    67
fun token_markup tok =
wenzelm@50238
    68
  if Token.keyword_with (not o Symbol.is_ascii_identifier) tok
wenzelm@50201
    69
  then (Markup.operator, "")
wenzelm@37197
    70
  else
wenzelm@37197
    71
    let
wenzelm@37197
    72
      val kind = Token.kind_of tok;
wenzelm@37197
    73
      val props =
wenzelm@38471
    74
        if kind = Token.Command
wenzelm@38471
    75
        then Markup.properties [(Markup.nameN, Token.content_of tok)]
wenzelm@37197
    76
        else I;
wenzelm@48768
    77
      val (markup, txt) = token_kind_markup kind;
wenzelm@48768
    78
    in (props markup, txt) end;
wenzelm@37192
    79
wenzelm@48749
    80
fun reports_of_token tok =
wenzelm@48749
    81
  let
wenzelm@48749
    82
    val malformed_symbols =
wenzelm@48749
    83
      Symbol_Pos.explode (Token.source_position_of tok)
wenzelm@48749
    84
      |> map_filter (fn (sym, pos) =>
wenzelm@48756
    85
          if Symbol.is_malformed sym
wenzelm@50201
    86
          then SOME ((pos, Markup.bad), "Malformed symbolic character") else NONE);
wenzelm@48749
    87
    val is_malformed = Token.is_error tok orelse not (null malformed_symbols);
wenzelm@48768
    88
    val (markup, txt) = token_markup tok;
wenzelm@48768
    89
    val reports = ((Token.position_of tok, markup), txt) :: malformed_symbols;
wenzelm@48749
    90
  in (is_malformed, reports) end;
wenzelm@40528
    91
wenzelm@23803
    92
in
wenzelm@23803
    93
wenzelm@48749
    94
fun reports_of_tokens toks =
wenzelm@48749
    95
  let val results = map reports_of_token toks
wenzelm@48749
    96
  in (exists fst results, maps snd results) end;
wenzelm@48749
    97
wenzelm@23726
    98
fun present_token tok =
wenzelm@48768
    99
  Markup.enclose (fst (token_markup tok)) (Output.output (Token.unparse tok));
wenzelm@23726
   100
wenzelm@23803
   101
end;
wenzelm@23803
   102
wenzelm@23803
   103
wenzelm@23803
   104
wenzelm@27665
   105
(** spans **)
wenzelm@27665
   106
wenzelm@27842
   107
(* type span *)
wenzelm@27842
   108
wenzelm@48878
   109
datatype span_kind = Command of string * Position.T | Ignored | Malformed;
wenzelm@36959
   110
datatype span = Span of span_kind * Token.T list;
wenzelm@23803
   111
wenzelm@27842
   112
fun span_kind (Span (k, _)) = k;
wenzelm@27842
   113
fun span_content (Span (_, toks)) = toks;
wenzelm@27842
   114
wenzelm@48865
   115
val present_span = implode o map present_token o span_content;
wenzelm@48865
   116
wenzelm@23803
   117
wenzelm@23803
   118
(* parse *)
wenzelm@23726
   119
wenzelm@23803
   120
local
wenzelm@23803
   121
wenzelm@46811
   122
fun make_span toks =
wenzelm@46811
   123
  if not (null toks) andalso Token.is_command (hd toks) then
wenzelm@48878
   124
    Span (Command (Token.content_of (hd toks), Token.position_of (hd toks)), toks)
wenzelm@51267
   125
  else if forall Token.is_improper toks then Span (Ignored, toks)
wenzelm@46811
   126
  else Span (Malformed, toks);
wenzelm@23726
   127
wenzelm@53843
   128
fun flush (result, span, improper) =
wenzelm@53843
   129
  result
wenzelm@53843
   130
  |> not (null span) ? cons (rev span)
wenzelm@53843
   131
  |> not (null improper) ? cons (rev improper);
wenzelm@53843
   132
wenzelm@53843
   133
fun parse tok (result, span, improper) =
wenzelm@53843
   134
  if Token.is_command tok then (flush (result, span, improper), [tok], [])
wenzelm@53843
   135
  else if Token.is_improper tok then (result, span, tok :: improper)
wenzelm@53843
   136
  else (result, tok :: (improper @ span), []);
wenzelm@23726
   137
wenzelm@23726
   138
in
wenzelm@23726
   139
wenzelm@46811
   140
fun parse_spans toks =
wenzelm@53843
   141
  fold parse toks ([], [], [])
wenzelm@53843
   142
  |> flush |> rev |> map make_span;
wenzelm@23803
   143
wenzelm@23803
   144
end;
wenzelm@23803
   145
wenzelm@23803
   146
wenzelm@54451
   147
(* inlined files *)
wenzelm@54451
   148
wenzelm@54451
   149
local
wenzelm@54451
   150
wenzelm@54451
   151
fun clean ((i1, t1) :: (i2, t2) :: toks) =
wenzelm@54451
   152
      if Token.keyword_with (fn s => s = "%" orelse s = "--") t1 then clean toks
wenzelm@54451
   153
      else (i1, t1) :: clean ((i2, t2) :: toks)
wenzelm@54451
   154
  | clean toks = toks;
wenzelm@54451
   155
wenzelm@54451
   156
fun clean_tokens toks =
wenzelm@54451
   157
  ((0 upto length toks - 1) ~~ toks)
wenzelm@54451
   158
  |> filter (fn (_, tok) => Token.is_proper tok)
wenzelm@54451
   159
  |> clean;
wenzelm@54451
   160
wenzelm@55118
   161
fun find_file ((_, tok) :: toks) =
wenzelm@55118
   162
      if Token.is_command tok then
wenzelm@55118
   163
        toks |> get_first (fn (i, tok) =>
wenzelm@55118
   164
          if Token.is_name tok then
wenzelm@55118
   165
            SOME (i, (Path.explode (Token.content_of tok), Token.position_of tok))
wenzelm@55118
   166
              handle ERROR msg => error (msg ^ Token.pos_of tok)
wenzelm@55118
   167
          else NONE)
wenzelm@55118
   168
      else NONE
wenzelm@55118
   169
  | find_file [] = NONE;
wenzelm@54451
   170
wenzelm@54451
   171
in
wenzelm@54451
   172
wenzelm@54451
   173
fun resolve_files read_files span =
wenzelm@54451
   174
  (case span of
wenzelm@54451
   175
    Span (Command (cmd, pos), toks) =>
wenzelm@54451
   176
      if Keyword.is_theory_load cmd then
wenzelm@55118
   177
        (case find_file (clean_tokens toks) of
wenzelm@54451
   178
          NONE => error ("Bad file argument of command " ^ quote cmd ^ Position.here pos)
wenzelm@54451
   179
        | SOME (i, path) =>
wenzelm@54451
   180
            let
wenzelm@54451
   181
              val toks' = toks |> map_index (fn (j, tok) =>
wenzelm@54451
   182
                if i = j then Token.put_files (read_files cmd path) tok
wenzelm@54451
   183
                else tok);
wenzelm@54451
   184
            in Span (Command (cmd, pos), toks') end)
wenzelm@54451
   185
      else span
wenzelm@54451
   186
  | _ => span);
wenzelm@54451
   187
wenzelm@54451
   188
end;
wenzelm@54451
   189
wenzelm@54451
   190
wenzelm@28434
   191
wenzelm@43621
   192
(** specification elements: commands with optional proof **)
wenzelm@43621
   193
wenzelm@51225
   194
datatype 'a element = Element of 'a * ('a element list * 'a) option;
wenzelm@51225
   195
wenzelm@51225
   196
fun element (a, b) = Element (a, SOME b);
wenzelm@51225
   197
fun atom a = Element (a, NONE);
wenzelm@43621
   198
wenzelm@51225
   199
fun map_element f (Element (a, NONE)) = Element (f a, NONE)
wenzelm@51225
   200
  | map_element f (Element (a, SOME (elems, b))) =
wenzelm@51225
   201
      Element (f a, SOME ((map o map_element) f elems, f b));
wenzelm@51225
   202
wenzelm@51225
   203
fun flat_element (Element (a, NONE)) = [a]
wenzelm@51225
   204
  | flat_element (Element (a, SOME (elems, b))) = a :: maps flat_element elems @ [b];
wenzelm@43621
   205
wenzelm@51268
   206
fun last_element (Element (a, NONE)) = a
wenzelm@51268
   207
  | last_element (Element (_, SOME (_, b))) = b;
wenzelm@51268
   208
wenzelm@28434
   209
wenzelm@28434
   210
(* scanning spans *)
wenzelm@28434
   211
wenzelm@48878
   212
val eof = Span (Command ("", Position.none), []);
wenzelm@28434
   213
wenzelm@48878
   214
fun is_eof (Span (Command ("", _), _)) = true
wenzelm@28434
   215
  | is_eof _ = false;
wenzelm@28434
   216
wenzelm@28434
   217
val not_eof = not o is_eof;
wenzelm@28434
   218
wenzelm@28434
   219
val stopper = Scan.stopper (K eof) is_eof;
wenzelm@28434
   220
wenzelm@28434
   221
wenzelm@46811
   222
(* parse *)
wenzelm@28434
   223
wenzelm@28434
   224
local
wenzelm@28434
   225
wenzelm@48878
   226
fun command_with pred =
wenzelm@48878
   227
  Scan.one (fn (Span (Command (name, _), _)) => pred name | _ => false);
wenzelm@28434
   228
wenzelm@51225
   229
val proof_atom =
wenzelm@51225
   230
  Scan.one (fn (Span (Command (name, _), _)) => Keyword.is_proof_body name | _ => true) >> atom;
wenzelm@28434
   231
wenzelm@51225
   232
fun proof_element x = (command_with Keyword.is_proof_goal -- proof_rest >> element || proof_atom) x
wenzelm@51225
   233
and proof_rest x = (Scan.repeat proof_element -- command_with Keyword.is_qed) x;
wenzelm@51225
   234
wenzelm@51225
   235
val other_element =
wenzelm@51225
   236
  command_with Keyword.is_theory_goal -- proof_rest >> element ||
wenzelm@51225
   237
  Scan.one not_eof >> atom;
wenzelm@28434
   238
wenzelm@28434
   239
in
wenzelm@28434
   240
wenzelm@46811
   241
val parse_elements =
wenzelm@46811
   242
  Source.of_list #>
wenzelm@51225
   243
  Source.source stopper (Scan.bulk other_element) NONE #>
wenzelm@46811
   244
  Source.exhaust;
wenzelm@28434
   245
wenzelm@23726
   246
end;
wenzelm@28434
   247
wenzelm@28434
   248
end;