src/Pure/ML/ml_lex.ML
changeset 56278 2576d3a40ed6
parent 55828 42ac3cfb89f6
child 56459 38d0b2099743
equal deleted inserted replaced
56277:c4f75e733812 56278:2576d3a40ed6
    24   val source: (Symbol.symbol, 'a) Source.source ->
    24   val source: (Symbol.symbol, 'a) Source.source ->
    25     (token, (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source)
    25     (token, (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source)
    26       Source.source) Source.source
    26       Source.source) Source.source
    27   val tokenize: string -> token list
    27   val tokenize: string -> token list
    28   val read: Position.T -> Symbol_Pos.text -> token Antiquote.antiquote list
    28   val read: Position.T -> Symbol_Pos.text -> token Antiquote.antiquote list
    29   val read_source: Symbol_Pos.source -> token Antiquote.antiquote list
    29   val read_source: bool -> Symbol_Pos.source -> token Antiquote.antiquote list
    30 end;
    30 end;
    31 
    31 
    32 structure ML_Lex: ML_LEX =
    32 structure ML_Lex: ML_LEX =
    33 struct
    33 struct
    34 
    34 
   130 
   130 
   131 (* markup *)
   131 (* markup *)
   132 
   132 
   133 local
   133 local
   134 
   134 
   135 val token_kind_markup =
   135 fun token_kind_markup SML =
   136  fn Keyword   => (Markup.empty, "")
   136  fn Keyword   => (Markup.empty, "")
   137   | Ident     => (Markup.empty, "")
   137   | Ident     => (Markup.empty, "")
   138   | LongIdent => (Markup.empty, "")
   138   | LongIdent => (Markup.empty, "")
   139   | TypeVar   => (Markup.ML_tvar, "")
   139   | TypeVar   => (Markup.ML_tvar, "")
   140   | Word      => (Markup.ML_numeral, "")
   140   | Word      => (Markup.ML_numeral, "")
   141   | Int       => (Markup.ML_numeral, "")
   141   | Int       => (Markup.ML_numeral, "")
   142   | Real      => (Markup.ML_numeral, "")
   142   | Real      => (Markup.ML_numeral, "")
   143   | Char      => (Markup.ML_char, "")
   143   | Char      => (Markup.ML_char, "")
   144   | String    => (Markup.ML_string, "")
   144   | String    => (if SML then Markup.SML_string else Markup.ML_string, "")
   145   | Space     => (Markup.empty, "")
   145   | Space     => (Markup.empty, "")
   146   | Comment   => (Markup.ML_comment, "")
   146   | Comment   => (if SML then Markup.SML_comment else Markup.ML_comment, "")
   147   | Error msg => (Markup.bad, msg)
   147   | Error msg => (Markup.bad, msg)
   148   | EOF       => (Markup.empty, "");
   148   | EOF       => (Markup.empty, "");
   149 
   149 
   150 in
   150 in
   151 
   151 
   152 fun report_of_token (tok as Token ((pos, _), (kind, x))) =
   152 fun report_of_token SML (tok as Token ((pos, _), (kind, x))) =
   153   let
   153   let
   154     val (markup, txt) =
   154     val (markup, txt) =
   155       if not (is_keyword tok) then token_kind_markup kind
   155       if not (is_keyword tok) then token_kind_markup SML kind
   156       else if is_delimiter tok then (Markup.ML_delimiter, "")
   156       else if is_delimiter tok then (Markup.ML_delimiter, "")
   157       else if member (op =) keywords2 x then (Markup.ML_keyword2, "")
   157       else if member (op =) keywords2 x then (Markup.ML_keyword2, "")
   158       else if member (op =) keywords3 x then (Markup.ML_keyword3, "")
   158       else if member (op =) keywords3 x then (Markup.ML_keyword3, "")
   159       else (Markup.ML_keyword1, "");
   159       else (Markup.ML_keyword1, "");
   160   in ((pos, markup), txt) end;
   160   in ((pos, markup), txt) end;
   289   recover_string ||
   289   recover_string ||
   290   Symbol_Pos.recover_comment ||
   290   Symbol_Pos.recover_comment ||
   291   Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single)
   291   Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single)
   292   >> (single o token (Error msg));
   292   >> (single o token (Error msg));
   293 
   293 
   294 in
   294 fun gen_read SML pos text =
   295 
       
   296 fun source src =
       
   297   Symbol_Pos.source (Position.line 1) src
       
   298   |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_ml)) (SOME (false, recover));
       
   299 
       
   300 val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust;
       
   301 
       
   302 fun read pos text =
       
   303   let
   295   let
   304     val syms = Symbol_Pos.explode (text, pos);
   296     val syms = Symbol_Pos.explode (text, pos);
   305     val termination =
   297     val termination =
   306       if null syms then []
   298       if null syms then []
   307       else
   299       else
   308         let
   300         let
   309           val pos1 = List.last syms |-> Position.advance;
   301           val pos1 = List.last syms |-> Position.advance;
   310           val pos2 = Position.advance Symbol.space pos1;
   302           val pos2 = Position.advance Symbol.space pos1;
   311         in [Antiquote.Text (Token (Position.range pos1 pos2, (Space, Symbol.space)))] end;
   303         in [Antiquote.Text (Token (Position.range pos1 pos2, (Space, Symbol.space)))] end;
       
   304 
       
   305     val scan = if SML then scan_ml >> Antiquote.Text else scan_antiq;
       
   306     fun check (Antiquote.Text tok) = (check_content_of tok; if SML then () else warn tok)
       
   307       | check _ = ();
   312     val input =
   308     val input =
   313       (Source.of_list syms
   309       Source.of_list syms
   314         |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_antiq))
   310       |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan))
   315           (SOME (false, fn msg => recover msg >> map Antiquote.Text))
   311         (SOME (false, fn msg => recover msg >> map Antiquote.Text))
   316         |> Source.exhaust
   312       |> Source.exhaust
   317         |> tap (Position.reports_text o Antiquote.antiquote_reports (single o report_of_token))
   313       |> tap (Position.reports_text o Antiquote.antiquote_reports (single o report_of_token SML))
   318         |> tap (List.app (fn Antiquote.Text tok => (check_content_of tok; warn tok) | _ => ())));
   314       |> tap (List.app check);
   319   in input @ termination end;
   315   in input @ termination end;
   320 
   316 
   321 fun read_source {delimited, text, pos} =
   317 in
   322   (Position.report pos (Markup.language_ML delimited); read pos text);
   318 
   323 
   319 fun source src =
   324 end;
   320   Symbol_Pos.source (Position.line 1) src
   325 
   321   |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_ml)) (SOME (false, recover));
   326 end;
   322 
   327 
   323 val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust;
       
   324 
       
   325 val read = gen_read false;
       
   326 
       
   327 fun read_source SML {delimited, text, pos} =
       
   328   (Position.report pos ((if SML then Markup.language_SML else Markup.language_ML) delimited);
       
   329    gen_read SML pos text);
       
   330 
       
   331 end;
       
   332 
       
   333 end;
       
   334