1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
use std::convert::Into;
use util::{as_char, StrCharIndex};
use types::*;
#[derive(Copy, Clone)]
pub struct SingleByteEncoding {
pub name: &'static str,
pub whatwg_name: Option<&'static str>,
pub index_forward: extern "Rust" fn(u8) -> u16,
pub index_backward: extern "Rust" fn(u32) -> u8,
}
impl Encoding for SingleByteEncoding {
fn name(&self) -> &'static str { self.name }
fn whatwg_name(&self) -> Option<&'static str> { self.whatwg_name }
fn raw_encoder(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) }
fn raw_decoder(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) }
}
#[derive(Clone, Copy)]
pub struct SingleByteEncoder {
index_backward: extern "Rust" fn(u32) -> u8,
}
impl SingleByteEncoder {
pub fn new(index_backward: extern "Rust" fn(u32) -> u8) -> Box<RawEncoder> {
Box::new(SingleByteEncoder { index_backward: index_backward })
}
}
impl RawEncoder for SingleByteEncoder {
fn from_self(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) }
fn is_ascii_compatible(&self) -> bool { true }
fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
output.writer_hint(input.len());
for ((i,j), ch) in input.index_iter() {
if ch <= '\u{7f}' {
output.write_byte(ch as u8);
continue;
} else {
let index = (self.index_backward)(ch as u32);
if index != 0 {
output.write_byte(index);
} else {
return (i, Some(CodecError {
upto: j as isize, cause: "unrepresentable character".into()
}));
}
}
}
(input.len(), None)
}
fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
None
}
}
#[derive(Clone, Copy)]
pub struct SingleByteDecoder {
index_forward: extern "Rust" fn(u8) -> u16,
}
impl SingleByteDecoder {
pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> Box<RawDecoder> {
Box::new(SingleByteDecoder { index_forward: index_forward })
}
}
impl RawDecoder for SingleByteDecoder {
fn from_self(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) }
fn is_ascii_compatible(&self) -> bool { true }
fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
output.writer_hint(input.len());
let mut i = 0;
let len = input.len();
while i < len {
if input[i] <= 0x7f {
output.write_char(input[i] as char);
} else {
let ch = (self.index_forward)(input[i]);
if ch != 0xffff {
output.write_char(as_char(ch as u32));
} else {
return (i, Some(CodecError {
upto: i as isize + 1, cause: "invalid sequence".into()
}));
}
}
i += 1;
}
(i, None)
}
fn raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError> {
None
}
}
pub mod iso_8859_1 {
#[inline] pub fn forward(code: u8) -> u16 { code as u16 }
#[inline] pub fn backward(code: u32) -> u8 { if (code & !0x7f) == 0x80 {code as u8} else {0} }
}
#[cfg(test)]
mod tests {
use all::ISO_8859_2;
use types::*;
#[test]
fn test_encoder_non_bmp() {
let mut e = ISO_8859_2.raw_encoder();
assert_feed_err!(e, "A", "\u{FFFF}", "B", [0x41]);
assert_feed_err!(e, "A", "\u{10000}", "B", [0x41]);
}
}