src/Tools/jEdit/src/isabelle_encoding.scala
author wenzelm
Wed, 30 Sep 2015 20:48:59 +0200
changeset 61292 ca76026ed7cc
parent 50203 00d8ad713e32
child 62104 fb73c0d7bb37
permissions -rw-r--r--
tuned GUI;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
43282
5d294220ca43 moved sources -- eliminated Netbeans artifact of jedit package directory;
wenzelm
parents: 37175
diff changeset
     1
/*  Title:      Tools/jEdit/src/isabelle_encoding.scala
36760
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     2
    Author:     Makarius
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     3
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     4
Isabelle encoding -- based on UTF-8.
b82a698ef6c9 tuned headers;
wenzelm
parents: 36015
diff changeset
     5
*/
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
     6
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
     7
package isabelle.jedit
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
     8
34760
dc7f5e0d9d27 misc modernization of names;
wenzelm
parents: 34759
diff changeset
     9
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    10
import isabelle._
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    11
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    12
import org.gjt.sp.jedit.io.Encoding
34625
799a40faa4f1 completion: decode symbols only if isabelle encoding is active (to prevent unicode chars written back to file);
wenzelm
parents: 34624
diff changeset
    13
import org.gjt.sp.jedit.buffer.JEditBuffer
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    14
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    15
import java.nio.charset.{Charset, CodingErrorAction}
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    16
import java.io.{InputStream, OutputStream, Reader, Writer, InputStreamReader, OutputStreamWriter,
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    17
  CharArrayReader, ByteArrayOutputStream}
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    18
48277
f14e564fca1a tuned imports;
wenzelm
parents: 43695
diff changeset
    19
import scala.io.{Codec, BufferedSource}
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    20
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    21
34738
80408ffc84a8 tuned file name;
wenzelm
parents: 34625
diff changeset
    22
object Isabelle_Encoding
34624
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    23
{
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    24
  val NAME = "UTF-8-Isabelle"
34625
799a40faa4f1 completion: decode symbols only if isabelle encoding is active (to prevent unicode chars written back to file);
wenzelm
parents: 34624
diff changeset
    25
799a40faa4f1 completion: decode symbols only if isabelle encoding is active (to prevent unicode chars written back to file);
wenzelm
parents: 34624
diff changeset
    26
  def is_active(buffer: JEditBuffer): Boolean =
34793
wenzelm
parents: 34777
diff changeset
    27
    buffer.getStringProperty(JEditBuffer.ENCODING).asInstanceOf[String] == NAME
34624
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    28
}
5e4f33d033ba decentralized Isabelle component names;
wenzelm
parents: 34621
diff changeset
    29
34738
80408ffc84a8 tuned file name;
wenzelm
parents: 34625
diff changeset
    30
class Isabelle_Encoding extends Encoding
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    31
{
43516
1c4736b9396a prefer actual charset over charset name;
wenzelm
parents: 43282
diff changeset
    32
  private val BUFSIZE = 32768
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    33
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    34
  private def text_reader(in: InputStream, codec: Codec): Reader =
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    35
  {
36015
6111de7c916a adapted to Scala 2.8.0 Beta 1;
wenzelm
parents: 34805
diff changeset
    36
    val source = new BufferedSource(in)(codec)
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43661
diff changeset
    37
    new CharArrayReader(Symbol.decode(source.mkString).toArray)
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    38
  }
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    39
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    40
  override def getTextReader(in: InputStream): Reader = text_reader(in, UTF8.codec())
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    41
37175
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    42
  override def getPermissiveTextReader(in: InputStream): Reader =
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    43
  {
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    44
    val codec = UTF8.codec()
37175
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    45
    codec.onMalformedInput(CodingErrorAction.REPLACE)
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    46
    codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    47
    text_reader(in, codec)
be764a7adb10 eliminated hard tabs;
wenzelm
parents: 36760
diff changeset
    48
  }
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    49
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    50
  override def getTextWriter(out: OutputStream): Writer =
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    51
  {
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    52
    val buffer = new ByteArrayOutputStream(BUFSIZE) {
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    53
      override def flush()
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    54
      {
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    55
        val text = Symbol.encode(toString(UTF8.charset_name))
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    56
        out.write(text.getBytes(UTF8.charset))
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    57
        out.flush()
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    58
      }
34621
6cba4b3723e4 more precise wrapping of I/O streams;
wenzelm
parents: 34620
diff changeset
    59
      override def close() { out.close() }
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    60
    }
50203
00d8ad713e32 explicit module UTF8;
wenzelm
parents: 48277
diff changeset
    61
    new OutputStreamWriter(buffer, UTF8.charset.newEncoder())
34620
5328c64f9866 some support for actual symbol recoding;
wenzelm
parents: 34619
diff changeset
    62
  }
34619
e89b6ec97910 added IsabelleEncoding -- a clone of utf-8 for now;
wenzelm
parents:
diff changeset
    63
}