src/Pure/General/url.scala
author wenzelm
Wed, 06 Jan 2021 13:47:50 +0100
changeset 73081 120ffea2c244
parent 72558 38ebf696fd0c
child 73367 77ef8bef0593
permissions -rw-r--r--
prefer OpenJDK from Azul: supports more versions and platforms; support arm64-darwin;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
56501
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/url.scala
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     3
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     4
Basic URL operations.
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     5
*/
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     6
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     7
package isabelle
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     8
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
     9
64729
4eccd9bc5fd9 clarified file URI operations;
wenzelm
parents: 63645
diff changeset
    10
import java.io.{File => JFile}
65188
50cfc6775361 more complete exception handling;
wenzelm
parents: 65069
diff changeset
    11
import java.nio.file.{Paths, FileSystemNotFoundException}
67245
caa4c9001009 tuned signature;
wenzelm
parents: 66235
diff changeset
    12
import java.net.{URI, URISyntaxException, URL, MalformedURLException, URLDecoder, URLEncoder}
69901
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    13
import java.util.Locale
63642
d83a1eeff9d2 more operations;
wenzelm
parents: 62248
diff changeset
    14
import java.util.zip.GZIPInputStream
56501
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    15
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    16
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    17
object Url
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    18
{
69901
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    19
  /* special characters */
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    20
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    21
  def escape_special(c: Char): String =
71163
b5822f4c3fde tuned -- avoid deprecated constructors;
wenzelm
parents: 69901
diff changeset
    22
    if ("!#$&'()*+,/:;=?@[]".contains(c)) {
b5822f4c3fde tuned -- avoid deprecated constructors;
wenzelm
parents: 69901
diff changeset
    23
      String.format(Locale.ROOT, "%%%02X", Integer.valueOf(c.toInt))
b5822f4c3fde tuned -- avoid deprecated constructors;
wenzelm
parents: 69901
diff changeset
    24
    }
69901
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    25
    else c.toString
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    26
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 71163
diff changeset
    27
  def escape_special(s: String): String = s.iterator.map(escape_special).mkString
69901
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    28
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    29
  def escape_name(name: String): String =
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    30
    name.iterator.map({ case '\'' => "%27" case c => c.toString }).mkString
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    31
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    32
20b32ade0024 clarified signature;
wenzelm
parents: 69394
diff changeset
    33
  /* make and check URLs */
62248
dca0bac351b2 allow single quote within URL;
wenzelm
parents: 56503
diff changeset
    34
56501
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    35
  def apply(name: String): URL =
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    36
  {
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    37
    try { new URL(name) }
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    38
    catch { case _: MalformedURLException => error("Malformed URL " + quote(name)) }
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    39
  }
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    40
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    41
  def is_wellformed(name: String): Boolean =
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    42
    try { Url(name); true }
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    43
    catch { case ERROR(_) => false }
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    44
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    45
  def is_readable(name: String): Boolean =
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    46
    try { Url(name).openStream.close; true }
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    47
    catch { case ERROR(_) => false }
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
    48
63642
d83a1eeff9d2 more operations;
wenzelm
parents: 62248
diff changeset
    49
72558
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    50
  /* trim index */
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    51
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    52
  def trim_index(url: URL): URL =
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    53
  {
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    54
    Library.try_unprefix("/index.html", url.toString) match {
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    55
      case Some(u) => Url(u)
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    56
      case None =>
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    57
        Library.try_unprefix("/index.php", url.toString) match {
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    58
          case Some(u) => Url(u)
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    59
          case None => url
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    60
        }
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    61
    }
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    62
  }
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    63
38ebf696fd0c support for Mailman list servers;
wenzelm
parents: 71601
diff changeset
    64
67245
caa4c9001009 tuned signature;
wenzelm
parents: 66235
diff changeset
    65
  /* strings */
caa4c9001009 tuned signature;
wenzelm
parents: 66235
diff changeset
    66
caa4c9001009 tuned signature;
wenzelm
parents: 66235
diff changeset
    67
  def decode(s: String): String = URLDecoder.decode(s, UTF8.charset_name)
caa4c9001009 tuned signature;
wenzelm
parents: 66235
diff changeset
    68
  def encode(s: String): String = URLEncoder.encode(s, UTF8.charset_name)
caa4c9001009 tuned signature;
wenzelm
parents: 66235
diff changeset
    69
caa4c9001009 tuned signature;
wenzelm
parents: 66235
diff changeset
    70
63642
d83a1eeff9d2 more operations;
wenzelm
parents: 62248
diff changeset
    71
  /* read */
d83a1eeff9d2 more operations;
wenzelm
parents: 62248
diff changeset
    72
63645
d7e0004d4321 more operations;
wenzelm
parents: 63642
diff changeset
    73
  private def read(url: URL, gzip: Boolean): String =
65069
wenzelm
parents: 64777
diff changeset
    74
    using(url.openStream)(stream =>
wenzelm
parents: 64777
diff changeset
    75
      File.read_stream(if (gzip) new GZIPInputStream(stream) else stream))
63642
d83a1eeff9d2 more operations;
wenzelm
parents: 62248
diff changeset
    76
63645
d7e0004d4321 more operations;
wenzelm
parents: 63642
diff changeset
    77
  def read(url: URL): String = read(url, false)
d7e0004d4321 more operations;
wenzelm
parents: 63642
diff changeset
    78
  def read_gzip(url: URL): String = read(url, true)
d7e0004d4321 more operations;
wenzelm
parents: 63642
diff changeset
    79
d7e0004d4321 more operations;
wenzelm
parents: 63642
diff changeset
    80
  def read(name: String): String = read(Url(name), false)
d7e0004d4321 more operations;
wenzelm
parents: 63642
diff changeset
    81
  def read_gzip(name: String): String = read(Url(name), true)
64729
4eccd9bc5fd9 clarified file URI operations;
wenzelm
parents: 63645
diff changeset
    82
69394
f3240f3aa698 more operations;
wenzelm
parents: 67245
diff changeset
    83
  def read_bytes(url: URL): Bytes =
f3240f3aa698 more operations;
wenzelm
parents: 67245
diff changeset
    84
  {
f3240f3aa698 more operations;
wenzelm
parents: 67245
diff changeset
    85
    val connection = url.openConnection
f3240f3aa698 more operations;
wenzelm
parents: 67245
diff changeset
    86
    val length = connection.getContentLength
f3240f3aa698 more operations;
wenzelm
parents: 67245
diff changeset
    87
    using(connection.getInputStream)(Bytes.read_stream(_, hint = length))
f3240f3aa698 more operations;
wenzelm
parents: 67245
diff changeset
    88
  }
f3240f3aa698 more operations;
wenzelm
parents: 67245
diff changeset
    89
64729
4eccd9bc5fd9 clarified file URI operations;
wenzelm
parents: 63645
diff changeset
    90
4eccd9bc5fd9 clarified file URI operations;
wenzelm
parents: 63645
diff changeset
    91
  /* file URIs */
4eccd9bc5fd9 clarified file URI operations;
wenzelm
parents: 63645
diff changeset
    92
66234
836898197296 clarified platform file operations;
wenzelm
parents: 65188
diff changeset
    93
  def print_file(file: JFile): String = File.absolute(file).toPath.toUri.toString
64777
ca09695eb43c clarified Document.Node.Name (again): canonical platform file;
wenzelm
parents: 64775
diff changeset
    94
  def print_file_name(name: String): String = print_file(new JFile(name))
64775
dd3797f1e0d6 clarified file URIs;
wenzelm
parents: 64766
diff changeset
    95
dd3797f1e0d6 clarified file URIs;
wenzelm
parents: 64766
diff changeset
    96
  def parse_file(uri: String): JFile = Paths.get(new URI(uri)).toFile
64729
4eccd9bc5fd9 clarified file URI operations;
wenzelm
parents: 63645
diff changeset
    97
4eccd9bc5fd9 clarified file URI operations;
wenzelm
parents: 63645
diff changeset
    98
  def is_wellformed_file(uri: String): Boolean =
64775
dd3797f1e0d6 clarified file URIs;
wenzelm
parents: 64766
diff changeset
    99
    try { parse_file(uri); true }
65188
50cfc6775361 more complete exception handling;
wenzelm
parents: 65069
diff changeset
   100
    catch {
50cfc6775361 more complete exception handling;
wenzelm
parents: 65069
diff changeset
   101
      case _: URISyntaxException | _: IllegalArgumentException | _: FileSystemNotFoundException =>
50cfc6775361 more complete exception handling;
wenzelm
parents: 65069
diff changeset
   102
        false
50cfc6775361 more complete exception handling;
wenzelm
parents: 65069
diff changeset
   103
    }
64730
76996d915894 clarified modules;
wenzelm
parents: 64729
diff changeset
   104
66235
d4fa51e7c4ff retain symlinks in file names from VSCode: relevant for proper file locations in decorations etc.;
wenzelm
parents: 66234
diff changeset
   105
  def absolute_file(uri: String): JFile = File.absolute(parse_file(uri))
d4fa51e7c4ff retain symlinks in file names from VSCode: relevant for proper file locations in decorations etc.;
wenzelm
parents: 66234
diff changeset
   106
  def absolute_file_name(uri: String): String = absolute_file(uri).getPath
d4fa51e7c4ff retain symlinks in file names from VSCode: relevant for proper file locations in decorations etc.;
wenzelm
parents: 66234
diff changeset
   107
66234
836898197296 clarified platform file operations;
wenzelm
parents: 65188
diff changeset
   108
  def canonical_file(uri: String): JFile = File.canonical(parse_file(uri))
64777
ca09695eb43c clarified Document.Node.Name (again): canonical platform file;
wenzelm
parents: 64775
diff changeset
   109
  def canonical_file_name(uri: String): String = canonical_file(uri).getPath
56501
5fda9e5c5874 basic URL operations (with Isabelle/Scala error handling);
wenzelm
parents:
diff changeset
   110
}