src/Tools/jEdit/src/isabelle_encoding.scala
author wenzelm
Tue, 24 May 2016 16:24:20 +0200
changeset 63140 0644c2e5a989
parent 62527 aae9a2a855e0
child 66457 9098c36abd1a
permissions -rw-r--r--
updated;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
43282
5d294220ca43 moved sources -- eliminated Netbeans artifact of jedit package directory;
wenzelm
parents: 37175
diff changeset
     1
/*  Title:      Tools/jEdit/src/isabelle_encoding.scala
36760
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     2
    Author:     Makarius
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     3
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     4
Isabelle encoding -- based on UTF-8.
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     5
*/
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
     6
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
     7
package isabelle.jedit
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
     8
34760
dc7f5e0d9d27 misc modernization of names;
wenzelm
parents: 34759
diff changeset
     9
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    10
import isabelle._
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    11
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    12
import org.gjt.sp.jedit.io.Encoding
34625
799a40faa4f1 completion: decode symbols only if isabelle encoding is active (to prevent unicode chars written back to file);
wenzelm
parents: 34624
diff changeset
    13
import org.gjt.sp.jedit.buffer.JEditBuffer
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    14
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    15
import java.nio.charset.{Charset, CodingErrorAction}
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    16
import java.io.{InputStream, OutputStream, Reader, Writer, InputStreamReader, OutputStreamWriter,
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    17
  CharArrayReader, ByteArrayOutputStream}
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    18
48277
f14e564fca1a tuned imports;
wenzelm
parents: 43695
diff changeset
    19
import scala.io.{Codec, BufferedSource}
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    20
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    21
34738
80408ffc84a8 tuned file name;
wenzelm
parents: 34625
diff changeset
    22
object Isabelle_Encoding
34624
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    23
{
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    24
  val NAME = "UTF-8-Isabelle"
34625
799a40faa4f1 completion: decode symbols only if isabelle encoding is active (to prevent unicode chars written back to file);
wenzelm
parents: 34624
diff changeset
    25
799a40faa4f1 completion: decode symbols only if isabelle encoding is active (to prevent unicode chars written back to file);
wenzelm
parents: 34624
diff changeset
    26
  def is_active(buffer: JEditBuffer): Boolean =
34793
wenzelm
parents: 34777
diff changeset
    27
    buffer.getStringProperty(JEditBuffer.ENCODING).asInstanceOf[String] == NAME
62104
fb73c0d7bb37 clarified symbol insertion, depending on buffer encoding;
wenzelm
parents: 50203
diff changeset
    28
fb73c0d7bb37 clarified symbol insertion, depending on buffer encoding;
wenzelm
parents: 50203
diff changeset
    29
  def maybe_decode(buffer: JEditBuffer, s: String): String =
fb73c0d7bb37 clarified symbol insertion, depending on buffer encoding;
wenzelm
parents: 50203
diff changeset
    30
    if (is_active(buffer)) Symbol.decode(s) else s
34624
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    31
}
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    32
34738
80408ffc84a8 tuned file name;
wenzelm
parents: 34625
diff changeset
    33
class Isabelle_Encoding extends Encoding
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    34
{
43516
1c4736b9396a prefer actual charset over charset name;
wenzelm
parents: 43282
diff changeset
    35
  private val BUFSIZE = 32768
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    36
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    37
  private def text_reader(in: InputStream, codec: Codec): Reader =
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    38
  {
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    39
    val source = new BufferedSource(in)(codec)
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43661
diff changeset
    40
    new CharArrayReader(Symbol.decode(source.mkString).toArray)
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    41
  }
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    42
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    43
  override def getTextReader(in: InputStream): Reader = text_reader(in, UTF8.codec())
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    44
37175
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    45
  override def getPermissiveTextReader(in: InputStream): Reader =
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    46
  {
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    47
    val codec = UTF8.codec()
37175
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    48
    codec.onMalformedInput(CodingErrorAction.REPLACE)
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    49
    codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    50
    text_reader(in, codec)
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    51
  }
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    52
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    53
  override def getTextWriter(out: OutputStream): Writer =
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    54
  {
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    55
    val buffer = new ByteArrayOutputStream(BUFSIZE) {
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    56
      override def flush()
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    57
      {
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    58
        val text = Symbol.encode(toString(UTF8.charset_name))
62527
aae9a2a855e0 tuned signature;
wenzelm
parents: 62104
diff changeset
    59
        out.write(UTF8.bytes(text))
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    60
        out.flush()
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    61
      }
34621
6cba4b3723e4 more precise wrapping of I/O streams;
wenzelm
parents: 34620
diff changeset
    62
      override def close() { out.close() }
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    63
    }
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    64
    new OutputStreamWriter(buffer, UTF8.charset.newEncoder())
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    65
  }
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    66
}