author | wenzelm |
Tue, 10 Dec 2024 16:37:09 +0100 | |
changeset 81569 | f8b28356ab94 |
parent 80192 | 36e6ba1527f0 |
permissions | -rw-r--r-- |
56501
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
1 |
/* Title: Pure/General/url.scala |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
2 |
Author: Makarius |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
3 |
|
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
4 |
Basic URL/URI operations. |
56501
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
5 |
*/ |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
6 |
|
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
7 |
package isabelle |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
8 |
|
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
9 |
|
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
10 |
import java.io.{File => JFile, InputStream} |
65188 | 11 |
import java.nio.file.{Paths, FileSystemNotFoundException} |
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
12 |
import java.net.{URI, URISyntaxException, MalformedURLException, URLDecoder, URLEncoder, |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
13 |
URLConnection} |
69901 | 14 |
import java.util.Locale |
63642 | 15 |
import java.util.zip.GZIPInputStream |
56501
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
16 |
|
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
17 |
|
75393 | 18 |
object Url { |
69901 | 19 |
/* special characters */ |
20 |
||
21 |
def escape_special(c: Char): String = |
|
71163 | 22 |
if ("!#$&'()*+,/:;=?@[]".contains(c)) { |
23 |
String.format(Locale.ROOT, "%%%02X", Integer.valueOf(c.toInt)) |
|
24 |
} |
|
69901 | 25 |
else c.toString |
26 |
||
71601 | 27 |
def escape_special(s: String): String = s.iterator.map(escape_special).mkString |
69901 | 28 |
|
29 |
def escape_name(name: String): String = |
|
30 |
name.iterator.map({ case '\'' => "%27" case c => c.toString }).mkString |
|
31 |
||
32 |
||
33 |
/* make and check URLs */ |
|
62248 | 34 |
|
79045
24d04dd5bf01
more robust exception handling (amending 8cc1ae43e12e);
wenzelm
parents:
79044
diff
changeset
|
35 |
def is_malformed(exn: Throwable): Boolean = |
24d04dd5bf01
more robust exception handling (amending 8cc1ae43e12e);
wenzelm
parents:
79044
diff
changeset
|
36 |
exn.isInstanceOf[MalformedURLException] || |
24d04dd5bf01
more robust exception handling (amending 8cc1ae43e12e);
wenzelm
parents:
79044
diff
changeset
|
37 |
exn.isInstanceOf[URISyntaxException] || |
24d04dd5bf01
more robust exception handling (amending 8cc1ae43e12e);
wenzelm
parents:
79044
diff
changeset
|
38 |
exn.isInstanceOf[IllegalArgumentException] |
24d04dd5bf01
more robust exception handling (amending 8cc1ae43e12e);
wenzelm
parents:
79044
diff
changeset
|
39 |
|
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
40 |
def apply(uri: URI): Url = new Url(uri) |
79044
8cc1ae43e12e
clarified signature: avoid deprecated URL constructors;
wenzelm
parents:
77796
diff
changeset
|
41 |
|
79514
9204c034a5bf
recover Url.is_wellformed from before d8330439823a, e.g. relevant for JEdit_Resources.read_file_content (the URI alone does not necessarily have a protocol prefix, so plain file-path would be treated as URL);
wenzelm
parents:
79511
diff
changeset
|
42 |
def apply(name: String): Url = |
9204c034a5bf
recover Url.is_wellformed from before d8330439823a, e.g. relevant for JEdit_Resources.read_file_content (the URI alone does not necessarily have a protocol prefix, so plain file-path would be treated as URL);
wenzelm
parents:
79511
diff
changeset
|
43 |
try { new Url(new URI(name)) } |
9204c034a5bf
recover Url.is_wellformed from before d8330439823a, e.g. relevant for JEdit_Resources.read_file_content (the URI alone does not necessarily have a protocol prefix, so plain file-path would be treated as URL);
wenzelm
parents:
79511
diff
changeset
|
44 |
catch { |
9204c034a5bf
recover Url.is_wellformed from before d8330439823a, e.g. relevant for JEdit_Resources.read_file_content (the URI alone does not necessarily have a protocol prefix, so plain file-path would be treated as URL);
wenzelm
parents:
79511
diff
changeset
|
45 |
case exn: Throwable if is_malformed(exn) => error("Malformed URL " + quote(name)) |
9204c034a5bf
recover Url.is_wellformed from before d8330439823a, e.g. relevant for JEdit_Resources.read_file_content (the URI alone does not necessarily have a protocol prefix, so plain file-path would be treated as URL);
wenzelm
parents:
79511
diff
changeset
|
46 |
} |
56501
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
47 |
|
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
48 |
def is_wellformed(name: String): Boolean = |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
49 |
try { Url(name); true } |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
50 |
catch { case ERROR(_) => false } |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
51 |
|
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
52 |
def is_readable(name: String): Boolean = |
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
53 |
try { Url(name).open_stream().close(); true } |
56501
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
54 |
catch { case ERROR(_) => false } |
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
55 |
|
63642 | 56 |
|
73417 | 57 |
/* file name */ |
58 |
||
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
59 |
def file_name(url: Url): String = |
79511 | 60 |
Library.take_suffix[Char](c => c != '/' && c != '\\', url.java_url.getFile.toList)._2.mkString |
72558 | 61 |
|
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
62 |
def trim_index(url: Url): Url = { |
72558 | 63 |
Library.try_unprefix("/index.html", url.toString) match { |
64 |
case Some(u) => Url(u) |
|
65 |
case None => |
|
66 |
Library.try_unprefix("/index.php", url.toString) match { |
|
67 |
case Some(u) => Url(u) |
|
68 |
case None => url |
|
69 |
} |
|
70 |
} |
|
71 |
} |
|
72 |
||
73 |
||
67245 | 74 |
/* strings */ |
75 |
||
76354 | 76 |
def decode(s: String): String = URLDecoder.decode(s, UTF8.charset) |
77 |
def encode(s: String): String = URLEncoder.encode(s, UTF8.charset) |
|
67245 | 78 |
|
79 |
||
63642 | 80 |
/* read */ |
81 |
||
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
82 |
private def read(url: Url, gzip: Boolean): String = |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
83 |
using(url.open_stream())(stream => |
65069 | 84 |
File.read_stream(if (gzip) new GZIPInputStream(stream) else stream)) |
63642 | 85 |
|
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
86 |
def read(url: Url): String = read(url, false) |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
87 |
def read_gzip(url: Url): String = read(url, true) |
63645 | 88 |
|
89 |
def read(name: String): String = read(Url(name), false) |
|
90 |
def read_gzip(name: String): String = read(Url(name), true) |
|
64729 | 91 |
|
92 |
||
93 |
/* file URIs */ |
|
94 |
||
66234 | 95 |
def print_file(file: JFile): String = File.absolute(file).toPath.toUri.toString |
64777
ca09695eb43c
clarified Document.Node.Name (again): canonical platform file;
wenzelm
parents:
64775
diff
changeset
|
96 |
def print_file_name(name: String): String = print_file(new JFile(name)) |
64775 | 97 |
|
98 |
def parse_file(uri: String): JFile = Paths.get(new URI(uri)).toFile |
|
64729 | 99 |
|
100 |
def is_wellformed_file(uri: String): Boolean = |
|
64775 | 101 |
try { parse_file(uri); true } |
65188 | 102 |
catch { |
103 |
case _: URISyntaxException | _: IllegalArgumentException | _: FileSystemNotFoundException => |
|
104 |
false |
|
105 |
} |
|
64730 | 106 |
|
66235
d4fa51e7c4ff
retain symlinks in file names from VSCode: relevant for proper file locations in decorations etc.;
wenzelm
parents:
66234
diff
changeset
|
107 |
def absolute_file(uri: String): JFile = File.absolute(parse_file(uri)) |
d4fa51e7c4ff
retain symlinks in file names from VSCode: relevant for proper file locations in decorations etc.;
wenzelm
parents:
66234
diff
changeset
|
108 |
def absolute_file_name(uri: String): String = absolute_file(uri).getPath |
d4fa51e7c4ff
retain symlinks in file names from VSCode: relevant for proper file locations in decorations etc.;
wenzelm
parents:
66234
diff
changeset
|
109 |
|
66234 | 110 |
def canonical_file(uri: String): JFile = File.canonical(parse_file(uri)) |
64777
ca09695eb43c
clarified Document.Node.Name (again): canonical platform file;
wenzelm
parents:
64775
diff
changeset
|
111 |
def canonical_file_name(uri: String): String = canonical_file(uri).getPath |
76617 | 112 |
|
113 |
||
77796 | 114 |
/* generic path notation: standard, platform, ssh, rsync, ftp, http, https */ |
76617 | 115 |
|
76828 | 116 |
private val separators1 = "/\\" |
117 |
private val separators2 = ":/\\" |
|
118 |
||
119 |
def is_base_name(s: String, suffix: String = ""): Boolean = |
|
120 |
s.nonEmpty && !s.exists(separators2.contains) && s.endsWith(suffix) |
|
121 |
||
122 |
def get_base_name(s: String, suffix: String = ""): Option[String] = { |
|
123 |
val i = s.lastIndexWhere(separators2.contains) |
|
124 |
if (i + 1 >= s.length) None else Library.try_unsuffix(suffix, s.substring(i + 1)) |
|
125 |
} |
|
126 |
||
127 |
def strip_base_name(s: String, suffix: String = ""): Option[String] = { |
|
128 |
val i = s.lastIndexWhere(separators2.contains) |
|
129 |
val j = s.lastIndexWhere(c => !separators1.contains(c), end = i) |
|
130 |
if (i + 1 >= s.length || !s.endsWith(suffix)) None |
|
131 |
else if (j < 0) Some(s.substring(0, i + 1)) |
|
132 |
else Some(s.substring(0, j + 1)) |
|
133 |
} |
|
134 |
||
76617 | 135 |
def append_path(prefix: String, suffix: String): String = |
76827 | 136 |
if (prefix.endsWith(":") || prefix.endsWith("/") || prefix.endsWith("\\") || prefix.isEmpty) { |
137 |
prefix + suffix |
|
138 |
} |
|
139 |
else if (prefix.endsWith(":.") || prefix.endsWith("/.") || prefix.endsWith("\\.") || prefix == ".") { |
|
76618 | 140 |
prefix.substring(0, prefix.length - 1) + suffix |
141 |
} |
|
76827 | 142 |
else if (prefix.contains('\\') || suffix.contains('\\')) { |
143 |
prefix + "\\" + suffix |
|
144 |
} |
|
76617 | 145 |
else prefix + "/" + suffix |
76622 | 146 |
|
147 |
def direct_path(prefix: String): String = append_path(prefix, ".") |
|
80192 | 148 |
|
149 |
def dir_path(prefix: String, direct: Boolean = false): String = |
|
150 |
if (direct) direct_path(prefix) else prefix |
|
56501
5fda9e5c5874
basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff
changeset
|
151 |
} |
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
152 |
|
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
153 |
final class Url private(val uri: URI) { |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
154 |
override def toString: String = uri.toString |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
155 |
|
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
156 |
override def hashCode: Int = uri.hashCode |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
157 |
override def equals(obj: Any): Boolean = |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
158 |
obj match { |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
159 |
case other: Url => uri == other.uri |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
160 |
case _ => false |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
161 |
} |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
162 |
|
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
163 |
def resolve(route: String): Url = |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
164 |
if (route.isEmpty) this else new Url(uri.resolve(route)) |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
165 |
|
79514
9204c034a5bf
recover Url.is_wellformed from before d8330439823a, e.g. relevant for JEdit_Resources.read_file_content (the URI alone does not necessarily have a protocol prefix, so plain file-path would be treated as URL);
wenzelm
parents:
79511
diff
changeset
|
166 |
val java_url: java.net.URL = uri.toURL |
79510
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
167 |
def open_stream(): InputStream = java_url.openStream() |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
168 |
def open_connection(): URLConnection = java_url.openConnection() |
d8330439823a
clarified signature: explicit type isabelle.Url to avoid oddities of java.net.URL (e.g. its "equals" method);
wenzelm
parents:
79045
diff
changeset
|
169 |
} |