src/Pure/General/url.ML
author wenzelm
Fri, 16 Jun 2023 14:01:30 +0200
changeset 78169 5ad1ae8626de
parent 72680 b22f1e2b4e94
permissions -rw-r--r--
minor performance tuning: avoid external process;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
     1
(*  Title:      Pure/General/url.ML
72680
b22f1e2b4e94 removed unused material;
wenzelm
parents: 72511
diff changeset
     2
    Author:     Makarius
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
     3
72680
b22f1e2b4e94 removed unused material;
wenzelm
parents: 72511
diff changeset
     4
Basic URLs (see RFC 1738 and RFC 2396).
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
     5
*)
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
     6
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
     7
signature URL =
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
     8
sig
14909
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
     9
  datatype T =
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
    10
    File of Path.T |
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
    11
    Http of string * Path.T |
68226
5ce62512de35 support HTTPS;
wenzelm
parents: 64776
diff changeset
    12
    Https of string * Path.T |
14909
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
    13
    Ftp of string * Path.T
21858
05f57309170c avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
wenzelm
parents: 21515
diff changeset
    14
  val explode: string -> T
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    15
end;
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    16
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    17
structure Url: URL =
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    18
struct
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    19
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    20
(* type url *)
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    21
14909
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
    22
datatype T =
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
    23
  File of Path.T |
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
    24
  Http of string * Path.T |
68226
5ce62512de35 support HTTPS;
wenzelm
parents: 64776
diff changeset
    25
  Https of string * Path.T |
14909
988b4342ed1f tuned representation; added RemoteFile;
wenzelm
parents: 8806
diff changeset
    26
  Ftp of string * Path.T;
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    27
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    28
21858
05f57309170c avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
wenzelm
parents: 21515
diff changeset
    29
(* explode *)
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    30
14918
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    31
local
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    32
16195
wenzelm
parents: 15627
diff changeset
    33
val scan_host =
58854
b979c781c2db discontinued obsolete \<^sync> marker;
wenzelm
parents: 54702
diff changeset
    34
  (Scan.many1 (fn s => s <> "/" andalso Symbol.not_eof s) >> implode) --|
14918
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    35
  Scan.ahead ($$ "/" || Scan.one Symbol.is_eof);
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    36
58854
b979c781c2db discontinued obsolete \<^sync> marker;
wenzelm
parents: 54702
diff changeset
    37
val scan_path = Scan.many Symbol.not_eof >> (Path.explode o implode);
b979c781c2db discontinued obsolete \<^sync> marker;
wenzelm
parents: 54702
diff changeset
    38
val scan_path_root = Scan.many Symbol.not_eof >> (Path.explode o implode o cons "/");
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    39
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    40
val scan_url =
68226
5ce62512de35 support HTTPS;
wenzelm
parents: 64776
diff changeset
    41
  Scan.unless
5ce62512de35 support HTTPS;
wenzelm
parents: 64776
diff changeset
    42
    (Scan.this_string "file:" || Scan.this_string "http:" ||
5ce62512de35 support HTTPS;
wenzelm
parents: 64776
diff changeset
    43
      Scan.this_string "https:" || Scan.this_string "ftp:") scan_path >> File ||
15175
b62f7b493360 Fix file:/// and file://localhost/ to give absolute paths
aspinall
parents: 15174
diff changeset
    44
  Scan.this_string "file:///" |-- scan_path_root >> File ||
b62f7b493360 Fix file:/// and file://localhost/ to give absolute paths
aspinall
parents: 15174
diff changeset
    45
  Scan.this_string "file://localhost/" |-- scan_path_root >> File ||
64776
3f20c63f71be Windows UNC path is plain file;
wenzelm
parents: 61877
diff changeset
    46
  Scan.this_string "file://" |-- scan_host -- scan_path
72511
460d743010bc clarified signature: overloaded "+" for Path.append;
wenzelm
parents: 68226
diff changeset
    47
    >> (fn (h, p) => File (Path.named_root h + p)) ||
21503
c4ea7e8c3937 Accept URLs of form file:/home... also.
aspinall
parents: 19305
diff changeset
    48
  Scan.this_string "file:/" |-- scan_path_root >> File ||
16195
wenzelm
parents: 15627
diff changeset
    49
  Scan.this_string "http://" |-- scan_host -- scan_path >> Http ||
68226
5ce62512de35 support HTTPS;
wenzelm
parents: 64776
diff changeset
    50
  Scan.this_string "https://" |-- scan_host -- scan_path >> Https ||
16195
wenzelm
parents: 15627
diff changeset
    51
  Scan.this_string "ftp://" |-- scan_host -- scan_path >> Ftp;
14918
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    52
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    53
in
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    54
72680
b22f1e2b4e94 removed unused material;
wenzelm
parents: 72511
diff changeset
    55
fun explode s = Symbol.scanner "Malformed URL" scan_url (Symbol.explode s);
6639
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    56
d399db16964c Basic URLs.
wenzelm
parents:
diff changeset
    57
end;
14918
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    58
9f30a1238090 improved RemoteFile;
wenzelm
parents: 14909
diff changeset
    59
end;