enc-jpn.lisp - clic - Clic is an command line interactive client for gopher wri… | |
git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
LICENSE | |
--- | |
enc-jpn.lisp (37967B) | |
--- | |
1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*- | |
2 ;;; | |
3 ;;; enc-jpn.lisp --- Japanese encodings. | |
4 ;;; | |
5 | |
6 (in-package #:babel-encodings) | |
7 | |
8 ;;;; helper functions | |
9 (defvar *eucjp-to-ucs-hash* (make-hash-table)) | |
10 (defvar *ucs-to-eucjp-hash* (make-hash-table)) | |
11 (defvar *cp932-to-ucs-hash* (make-hash-table)) | |
12 (defvar *ucs-to-cp932-hash* (make-hash-table)) | |
13 | |
14 (dolist (i `((,*cp932-only* | |
15 ,*cp932-to-ucs-hash* | |
16 ,*ucs-to-cp932-hash*) | |
17 (,*eucjp-only* | |
18 ,*eucjp-to-ucs-hash* | |
19 ,*ucs-to-eucjp-hash*) | |
20 (,*eucjp* | |
21 ,*eucjp-to-ucs-hash* | |
22 ,*ucs-to-eucjp-hash*))) | |
23 (dolist (j (first i)) | |
24 (setf (gethash (car j) (second i)) (cadr j)) | |
25 (setf (gethash (cadr j) (third i)) (car j)))) | |
26 | |
27 (flet ((euc-cp932 (x) | |
28 (let ((high (ash x -16)) | |
29 (mid (logand (ash x -8) 255)) | |
30 (low (logand x 255))) | |
31 (cond ((not (zerop high)) | |
32 nil) | |
33 ((= mid #x8e) | |
34 (logand x 255)) | |
35 ((zerop mid) | |
36 x) | |
37 ((decf mid #xa1) | |
38 (decf low #x80) | |
39 (incf low (if (zerop (logand mid 1)) #x1f #x7e)) | |
40 (incf low (if (<= #x7f low #x9d) 1 0)) | |
41 (setq mid (ash mid -1)) | |
42 (incf mid (if (<= mid #x1e) #x81 #xc1)) | |
43 (+ (ash mid 8) low)))))) | |
44 (dolist (i *eucjp*) | |
45 (let ((cp932 (euc-cp932 (first i)))) | |
46 (when cp932 | |
47 (setf (gethash cp932 *cp932-to-ucs-hash*) (second i)) | |
48 (setf (gethash (second i) *ucs-to-cp932-hash*) cp932))))) | |
49 | |
50 ;ascii | |
51 (loop for i from #x00 to #x7f do | |
52 (setf (gethash i *cp932-to-ucs-hash*) i) | |
53 (setf (gethash i *eucjp-to-ucs-hash*) i) | |
54 (setf (gethash i *ucs-to-eucjp-hash*) i) | |
55 (setf (gethash i *ucs-to-cp932-hash*) i)) | |
56 | |
57 ;half-width katakana | |
58 (loop for i from #xa1 to #xdf do | |
59 (setf (gethash i *cp932-to-ucs-hash*) (+ #xff61 #x-a1 i)) | |
60 (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-cp932-hash*) i) | |
61 (setf (gethash (+ #x8e00 i) *eucjp-to-ucs-hash*) (+ #xff61 #x-a1 i… | |
62 (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-eucjp-hash*) (+ #x8e00 i… | |
63 | |
64 ;; This is quoted from https://support.microsoft.com/en-us/kb/170559/en-… | |
65 (let ((kb170559 "0x8790 -> U+2252 -> 0x81e0 Approximately Equal To… | |
66 0x8791 -> U+2261 -> 0x81df Identical To | |
67 0x8792 -> U+222b -> 0x81e7 Integral | |
68 0x8795 -> U+221a -> 0x81e3 Square Root | |
69 0x8796 -> U+22a5 -> 0x81db Up Tack | |
70 0x8797 -> U+2220 -> 0x81da Angle | |
71 0x879a -> U+2235 -> 0x81e6 Because | |
72 0x879b -> U+2229 -> 0x81bf Intersection | |
73 0x879c -> U+222a -> 0x81be Union | |
74 0xed40 -> U+7e8a -> 0xfa5c CJK Unified Ideograph | |
75 0xed41 -> U+891c -> 0xfa5d CJK Unified Ideograph | |
76 0xed42 -> U+9348 -> 0xfa5e CJK Unified Ideograph | |
77 0xed43 -> U+9288 -> 0xfa5f CJK Unified Ideograph | |
78 0xed44 -> U+84dc -> 0xfa60 CJK Unified Ideograph | |
79 0xed45 -> U+4fc9 -> 0xfa61 CJK Unified Ideograph | |
80 0xed46 -> U+70bb -> 0xfa62 CJK Unified Ideograph | |
81 0xed47 -> U+6631 -> 0xfa63 CJK Unified Ideograph | |
82 0xed48 -> U+68c8 -> 0xfa64 CJK Unified Ideograph | |
83 0xed49 -> U+92f9 -> 0xfa65 CJK Unified Ideograph | |
84 0xed4a -> U+66fb -> 0xfa66 CJK Unified Ideograph | |
85 0xed4b -> U+5f45 -> 0xfa67 CJK Unified Ideograph | |
86 0xed4c -> U+4e28 -> 0xfa68 CJK Unified Ideograph | |
87 0xed4d -> U+4ee1 -> 0xfa69 CJK Unified Ideograph | |
88 0xed4e -> U+4efc -> 0xfa6a CJK Unified Ideograph | |
89 0xed4f -> U+4f00 -> 0xfa6b CJK Unified Ideograph | |
90 0xed50 -> U+4f03 -> 0xfa6c CJK Unified Ideograph | |
91 0xed51 -> U+4f39 -> 0xfa6d CJK Unified Ideograph | |
92 0xed52 -> U+4f56 -> 0xfa6e CJK Unified Ideograph | |
93 0xed53 -> U+4f92 -> 0xfa6f CJK Unified Ideograph | |
94 0xed54 -> U+4f8a -> 0xfa70 CJK Unified Ideograph | |
95 0xed55 -> U+4f9a -> 0xfa71 CJK Unified Ideograph | |
96 0xed56 -> U+4f94 -> 0xfa72 CJK Unified Ideograph | |
97 0xed57 -> U+4fcd -> 0xfa73 CJK Unified Ideograph | |
98 0xed58 -> U+5040 -> 0xfa74 CJK Unified Ideograph | |
99 0xed59 -> U+5022 -> 0xfa75 CJK Unified Ideograph | |
100 0xed5a -> U+4fff -> 0xfa76 CJK Unified Ideograph | |
101 0xed5b -> U+501e -> 0xfa77 CJK Unified Ideograph | |
102 0xed5c -> U+5046 -> 0xfa78 CJK Unified Ideograph | |
103 0xed5d -> U+5070 -> 0xfa79 CJK Unified Ideograph | |
104 0xed5e -> U+5042 -> 0xfa7a CJK Unified Ideograph | |
105 0xed5f -> U+5094 -> 0xfa7b CJK Unified Ideograph | |
106 0xed60 -> U+50f4 -> 0xfa7c CJK Unified Ideograph | |
107 0xed61 -> U+50d8 -> 0xfa7d CJK Unified Ideograph | |
108 0xed62 -> U+514a -> 0xfa7e CJK Unified Ideograph | |
109 0xed63 -> U+5164 -> 0xfa80 CJK Unified Ideograph | |
110 0xed64 -> U+519d -> 0xfa81 CJK Unified Ideograph | |
111 0xed65 -> U+51be -> 0xfa82 CJK Unified Ideograph | |
112 0xed66 -> U+51ec -> 0xfa83 CJK Unified Ideograph | |
113 0xed67 -> U+5215 -> 0xfa84 CJK Unified Ideograph | |
114 0xed68 -> U+529c -> 0xfa85 CJK Unified Ideograph | |
115 0xed69 -> U+52a6 -> 0xfa86 CJK Unified Ideograph | |
116 0xed6a -> U+52c0 -> 0xfa87 CJK Unified Ideograph | |
117 0xed6b -> U+52db -> 0xfa88 CJK Unified Ideograph | |
118 0xed6c -> U+5300 -> 0xfa89 CJK Unified Ideograph | |
119 0xed6d -> U+5307 -> 0xfa8a CJK Unified Ideograph | |
120 0xed6e -> U+5324 -> 0xfa8b CJK Unified Ideograph | |
121 0xed6f -> U+5372 -> 0xfa8c CJK Unified Ideograph | |
122 0xed70 -> U+5393 -> 0xfa8d CJK Unified Ideograph | |
123 0xed71 -> U+53b2 -> 0xfa8e CJK Unified Ideograph | |
124 0xed72 -> U+53dd -> 0xfa8f CJK Unified Ideograph | |
125 0xed73 -> U+fa0e -> 0xfa90 CJK compatibility Ideograph | |
126 0xed74 -> U+549c -> 0xfa91 CJK Unified Ideograph | |
127 0xed75 -> U+548a -> 0xfa92 CJK Unified Ideograph | |
128 0xed76 -> U+54a9 -> 0xfa93 CJK Unified Ideograph | |
129 0xed77 -> U+54ff -> 0xfa94 CJK Unified Ideograph | |
130 0xed78 -> U+5586 -> 0xfa95 CJK Unified Ideograph | |
131 0xed79 -> U+5759 -> 0xfa96 CJK Unified Ideograph | |
132 0xed7a -> U+5765 -> 0xfa97 CJK Unified Ideograph | |
133 0xed7b -> U+57ac -> 0xfa98 CJK Unified Ideograph | |
134 0xed7c -> U+57c8 -> 0xfa99 CJK Unified Ideograph | |
135 0xed7d -> U+57c7 -> 0xfa9a CJK Unified Ideograph | |
136 0xed7e -> U+fa0f -> 0xfa9b CJK compatibility Ideograph | |
137 0xed80 -> U+fa10 -> 0xfa9c CJK compatibility Ideograph | |
138 0xed81 -> U+589e -> 0xfa9d CJK Unified Ideograph | |
139 0xed82 -> U+58b2 -> 0xfa9e CJK Unified Ideograph | |
140 0xed83 -> U+590b -> 0xfa9f CJK Unified Ideograph | |
141 0xed84 -> U+5953 -> 0xfaa0 CJK Unified Ideograph | |
142 0xed85 -> U+595b -> 0xfaa1 CJK Unified Ideograph | |
143 0xed86 -> U+595d -> 0xfaa2 CJK Unified Ideograph | |
144 0xed87 -> U+5963 -> 0xfaa3 CJK Unified Ideograph | |
145 0xed88 -> U+59a4 -> 0xfaa4 CJK Unified Ideograph | |
146 0xed89 -> U+59ba -> 0xfaa5 CJK Unified Ideograph | |
147 0xed8a -> U+5b56 -> 0xfaa6 CJK Unified Ideograph | |
148 0xed8b -> U+5bc0 -> 0xfaa7 CJK Unified Ideograph | |
149 0xed8c -> U+752f -> 0xfaa8 CJK Unified Ideograph | |
150 0xed8d -> U+5bd8 -> 0xfaa9 CJK Unified Ideograph | |
151 0xed8e -> U+5bec -> 0xfaaa CJK Unified Ideograph | |
152 0xed8f -> U+5c1e -> 0xfaab CJK Unified Ideograph | |
153 0xed90 -> U+5ca6 -> 0xfaac CJK Unified Ideograph | |
154 0xed91 -> U+5cba -> 0xfaad CJK Unified Ideograph | |
155 0xed92 -> U+5cf5 -> 0xfaae CJK Unified Ideograph | |
156 0xed93 -> U+5d27 -> 0xfaaf CJK Unified Ideograph | |
157 0xed94 -> U+5d53 -> 0xfab0 CJK Unified Ideograph | |
158 0xed95 -> U+fa11 -> 0xfab1 CJK compatibility Ideograph | |
159 0xed96 -> U+5d42 -> 0xfab2 CJK Unified Ideograph | |
160 0xed97 -> U+5d6d -> 0xfab3 CJK Unified Ideograph | |
161 0xed98 -> U+5db8 -> 0xfab4 CJK Unified Ideograph | |
162 0xed99 -> U+5db9 -> 0xfab5 CJK Unified Ideograph | |
163 0xed9a -> U+5dd0 -> 0xfab6 CJK Unified Ideograph | |
164 0xed9b -> U+5f21 -> 0xfab7 CJK Unified Ideograph | |
165 0xed9c -> U+5f34 -> 0xfab8 CJK Unified Ideograph | |
166 0xed9d -> U+5f67 -> 0xfab9 CJK Unified Ideograph | |
167 0xed9e -> U+5fb7 -> 0xfaba CJK Unified Ideograph | |
168 0xed9f -> U+5fde -> 0xfabb CJK Unified Ideograph | |
169 0xeda0 -> U+605d -> 0xfabc CJK Unified Ideograph | |
170 0xeda1 -> U+6085 -> 0xfabd CJK Unified Ideograph | |
171 0xeda2 -> U+608a -> 0xfabe CJK Unified Ideograph | |
172 0xeda3 -> U+60de -> 0xfabf CJK Unified Ideograph | |
173 0xeda4 -> U+60d5 -> 0xfac0 CJK Unified Ideograph | |
174 0xeda5 -> U+6120 -> 0xfac1 CJK Unified Ideograph | |
175 0xeda6 -> U+60f2 -> 0xfac2 CJK Unified Ideograph | |
176 0xeda7 -> U+6111 -> 0xfac3 CJK Unified Ideograph | |
177 0xeda8 -> U+6137 -> 0xfac4 CJK Unified Ideograph | |
178 0xeda9 -> U+6130 -> 0xfac5 CJK Unified Ideograph | |
179 0xedaa -> U+6198 -> 0xfac6 CJK Unified Ideograph | |
180 0xedab -> U+6213 -> 0xfac7 CJK Unified Ideograph | |
181 0xedac -> U+62a6 -> 0xfac8 CJK Unified Ideograph | |
182 0xedad -> U+63f5 -> 0xfac9 CJK Unified Ideograph | |
183 0xedae -> U+6460 -> 0xfaca CJK Unified Ideograph | |
184 0xedaf -> U+649d -> 0xfacb CJK Unified Ideograph | |
185 0xedb0 -> U+64ce -> 0xfacc CJK Unified Ideograph | |
186 0xedb1 -> U+654e -> 0xfacd CJK Unified Ideograph | |
187 0xedb2 -> U+6600 -> 0xface CJK Unified Ideograph | |
188 0xedb3 -> U+6615 -> 0xfacf CJK Unified Ideograph | |
189 0xedb4 -> U+663b -> 0xfad0 CJK Unified Ideograph | |
190 0xedb5 -> U+6609 -> 0xfad1 CJK Unified Ideograph | |
191 0xedb6 -> U+662e -> 0xfad2 CJK Unified Ideograph | |
192 0xedb7 -> U+661e -> 0xfad3 CJK Unified Ideograph | |
193 0xedb8 -> U+6624 -> 0xfad4 CJK Unified Ideograph | |
194 0xedb9 -> U+6665 -> 0xfad5 CJK Unified Ideograph | |
195 0xedba -> U+6657 -> 0xfad6 CJK Unified Ideograph | |
196 0xedbb -> U+6659 -> 0xfad7 CJK Unified Ideograph | |
197 0xedbc -> U+fa12 -> 0xfad8 CJK compatibility Ideograph | |
198 0xedbd -> U+6673 -> 0xfad9 CJK Unified Ideograph | |
199 0xedbe -> U+6699 -> 0xfada CJK Unified Ideograph | |
200 0xedbf -> U+66a0 -> 0xfadb CJK Unified Ideograph | |
201 0xedc0 -> U+66b2 -> 0xfadc CJK Unified Ideograph | |
202 0xedc1 -> U+66bf -> 0xfadd CJK Unified Ideograph | |
203 0xedc2 -> U+66fa -> 0xfade CJK Unified Ideograph | |
204 0xedc3 -> U+670e -> 0xfadf CJK Unified Ideograph | |
205 0xedc4 -> U+f929 -> 0xfae0 CJK compatibility Ideograph | |
206 0xedc5 -> U+6766 -> 0xfae1 CJK Unified Ideograph | |
207 0xedc6 -> U+67bb -> 0xfae2 CJK Unified Ideograph | |
208 0xedc7 -> U+6852 -> 0xfae3 CJK Unified Ideograph | |
209 0xedc8 -> U+67c0 -> 0xfae4 CJK Unified Ideograph | |
210 0xedc9 -> U+6801 -> 0xfae5 CJK Unified Ideograph | |
211 0xedca -> U+6844 -> 0xfae6 CJK Unified Ideograph | |
212 0xedcb -> U+68cf -> 0xfae7 CJK Unified Ideograph | |
213 0xedcc -> U+fa13 -> 0xfae8 CJK compatibility Ideograph | |
214 0xedcd -> U+6968 -> 0xfae9 CJK Unified Ideograph | |
215 0xedce -> U+fa14 -> 0xfaea CJK compatibility Ideograph | |
216 0xedcf -> U+6998 -> 0xfaeb CJK Unified Ideograph | |
217 0xedd0 -> U+69e2 -> 0xfaec CJK Unified Ideograph | |
218 0xedd1 -> U+6a30 -> 0xfaed CJK Unified Ideograph | |
219 0xedd2 -> U+6a6b -> 0xfaee CJK Unified Ideograph | |
220 0xedd3 -> U+6a46 -> 0xfaef CJK Unified Ideograph | |
221 0xedd4 -> U+6a73 -> 0xfaf0 CJK Unified Ideograph | |
222 0xedd5 -> U+6a7e -> 0xfaf1 CJK Unified Ideograph | |
223 0xedd6 -> U+6ae2 -> 0xfaf2 CJK Unified Ideograph | |
224 0xedd7 -> U+6ae4 -> 0xfaf3 CJK Unified Ideograph | |
225 0xedd8 -> U+6bd6 -> 0xfaf4 CJK Unified Ideograph | |
226 0xedd9 -> U+6c3f -> 0xfaf5 CJK Unified Ideograph | |
227 0xedda -> U+6c5c -> 0xfaf6 CJK Unified Ideograph | |
228 0xeddb -> U+6c86 -> 0xfaf7 CJK Unified Ideograph | |
229 0xeddc -> U+6c6f -> 0xfaf8 CJK Unified Ideograph | |
230 0xeddd -> U+6cda -> 0xfaf9 CJK Unified Ideograph | |
231 0xedde -> U+6d04 -> 0xfafa CJK Unified Ideograph | |
232 0xeddf -> U+6d87 -> 0xfafb CJK Unified Ideograph | |
233 0xede0 -> U+6d6f -> 0xfafc CJK Unified Ideograph | |
234 0xede1 -> U+6d96 -> 0xfb40 CJK Unified Ideograph | |
235 0xede2 -> U+6dac -> 0xfb41 CJK Unified Ideograph | |
236 0xede3 -> U+6dcf -> 0xfb42 CJK Unified Ideograph | |
237 0xede4 -> U+6df8 -> 0xfb43 CJK Unified Ideograph | |
238 0xede5 -> U+6df2 -> 0xfb44 CJK Unified Ideograph | |
239 0xede6 -> U+6dfc -> 0xfb45 CJK Unified Ideograph | |
240 0xede7 -> U+6e39 -> 0xfb46 CJK Unified Ideograph | |
241 0xede8 -> U+6e5c -> 0xfb47 CJK Unified Ideograph | |
242 0xede9 -> U+6e27 -> 0xfb48 CJK Unified Ideograph | |
243 0xedea -> U+6e3c -> 0xfb49 CJK Unified Ideograph | |
244 0xedeb -> U+6ebf -> 0xfb4a CJK Unified Ideograph | |
245 0xedec -> U+6f88 -> 0xfb4b CJK Unified Ideograph | |
246 0xeded -> U+6fb5 -> 0xfb4c CJK Unified Ideograph | |
247 0xedee -> U+6ff5 -> 0xfb4d CJK Unified Ideograph | |
248 0xedef -> U+7005 -> 0xfb4e CJK Unified Ideograph | |
249 0xedf0 -> U+7007 -> 0xfb4f CJK Unified Ideograph | |
250 0xedf1 -> U+7028 -> 0xfb50 CJK Unified Ideograph | |
251 0xedf2 -> U+7085 -> 0xfb51 CJK Unified Ideograph | |
252 0xedf3 -> U+70ab -> 0xfb52 CJK Unified Ideograph | |
253 0xedf4 -> U+710f -> 0xfb53 CJK Unified Ideograph | |
254 0xedf5 -> U+7104 -> 0xfb54 CJK Unified Ideograph | |
255 0xedf6 -> U+715c -> 0xfb55 CJK Unified Ideograph | |
256 0xedf7 -> U+7146 -> 0xfb56 CJK Unified Ideograph | |
257 0xedf8 -> U+7147 -> 0xfb57 CJK Unified Ideograph | |
258 0xedf9 -> U+fa15 -> 0xfb58 CJK compatibility Ideograph | |
259 0xedfa -> U+71c1 -> 0xfb59 CJK Unified Ideograph | |
260 0xedfb -> U+71fe -> 0xfb5a CJK Unified Ideograph | |
261 0xedfc -> U+72b1 -> 0xfb5b CJK Unified Ideograph | |
262 0xee40 -> U+72be -> 0xfb5c CJK Unified Ideograph | |
263 0xee41 -> U+7324 -> 0xfb5d CJK Unified Ideograph | |
264 0xee42 -> U+fa16 -> 0xfb5e CJK compatibility Ideograph | |
265 0xee43 -> U+7377 -> 0xfb5f CJK Unified Ideograph | |
266 0xee44 -> U+73bd -> 0xfb60 CJK Unified Ideograph | |
267 0xee45 -> U+73c9 -> 0xfb61 CJK Unified Ideograph | |
268 0xee46 -> U+73d6 -> 0xfb62 CJK Unified Ideograph | |
269 0xee47 -> U+73e3 -> 0xfb63 CJK Unified Ideograph | |
270 0xee48 -> U+73d2 -> 0xfb64 CJK Unified Ideograph | |
271 0xee49 -> U+7407 -> 0xfb65 CJK Unified Ideograph | |
272 0xee4a -> U+73f5 -> 0xfb66 CJK Unified Ideograph | |
273 0xee4b -> U+7426 -> 0xfb67 CJK Unified Ideograph | |
274 0xee4c -> U+742a -> 0xfb68 CJK Unified Ideograph | |
275 0xee4d -> U+7429 -> 0xfb69 CJK Unified Ideograph | |
276 0xee4e -> U+742e -> 0xfb6a CJK Unified Ideograph | |
277 0xee4f -> U+7462 -> 0xfb6b CJK Unified Ideograph | |
278 0xee50 -> U+7489 -> 0xfb6c CJK Unified Ideograph | |
279 0xee51 -> U+749f -> 0xfb6d CJK Unified Ideograph | |
280 0xee52 -> U+7501 -> 0xfb6e CJK Unified Ideograph | |
281 0xee53 -> U+756f -> 0xfb6f CJK Unified Ideograph | |
282 0xee54 -> U+7682 -> 0xfb70 CJK Unified Ideograph | |
283 0xee55 -> U+769c -> 0xfb71 CJK Unified Ideograph | |
284 0xee56 -> U+769e -> 0xfb72 CJK Unified Ideograph | |
285 0xee57 -> U+769b -> 0xfb73 CJK Unified Ideograph | |
286 0xee58 -> U+76a6 -> 0xfb74 CJK Unified Ideograph | |
287 0xee59 -> U+fa17 -> 0xfb75 CJK compatibility Ideograph | |
288 0xee5a -> U+7746 -> 0xfb76 CJK Unified Ideograph | |
289 0xee5b -> U+52af -> 0xfb77 CJK Unified Ideograph | |
290 0xee5c -> U+7821 -> 0xfb78 CJK Unified Ideograph | |
291 0xee5d -> U+784e -> 0xfb79 CJK Unified Ideograph | |
292 0xee5e -> U+7864 -> 0xfb7a CJK Unified Ideograph | |
293 0xee5f -> U+787a -> 0xfb7b CJK Unified Ideograph | |
294 0xee60 -> U+7930 -> 0xfb7c CJK Unified Ideograph | |
295 0xee61 -> U+fa18 -> 0xfb7d CJK compatibility Ideograph | |
296 0xee62 -> U+fa19 -> 0xfb7e CJK compatibility Ideograph | |
297 0xee63 -> U+fa1a -> 0xfb80 CJK compatibility Ideograph | |
298 0xee64 -> U+7994 -> 0xfb81 CJK Unified Ideograph | |
299 0xee65 -> U+fa1b -> 0xfb82 CJK compatibility Ideograph | |
300 0xee66 -> U+799b -> 0xfb83 CJK Unified Ideograph | |
301 0xee67 -> U+7ad1 -> 0xfb84 CJK Unified Ideograph | |
302 0xee68 -> U+7ae7 -> 0xfb85 CJK Unified Ideograph | |
303 0xee69 -> U+fa1c -> 0xfb86 CJK compatibility Ideograph | |
304 0xee6a -> U+7aeb -> 0xfb87 CJK Unified Ideograph | |
305 0xee6b -> U+7b9e -> 0xfb88 CJK Unified Ideograph | |
306 0xee6c -> U+fa1d -> 0xfb89 CJK compatibility Ideograph | |
307 0xee6d -> U+7d48 -> 0xfb8a CJK Unified Ideograph | |
308 0xee6e -> U+7d5c -> 0xfb8b CJK Unified Ideograph | |
309 0xee6f -> U+7db7 -> 0xfb8c CJK Unified Ideograph | |
310 0xee70 -> U+7da0 -> 0xfb8d CJK Unified Ideograph | |
311 0xee71 -> U+7dd6 -> 0xfb8e CJK Unified Ideograph | |
312 0xee72 -> U+7e52 -> 0xfb8f CJK Unified Ideograph | |
313 0xee73 -> U+7f47 -> 0xfb90 CJK Unified Ideograph | |
314 0xee74 -> U+7fa1 -> 0xfb91 CJK Unified Ideograph | |
315 0xee75 -> U+fa1e -> 0xfb92 CJK compatibility Ideograph | |
316 0xee76 -> U+8301 -> 0xfb93 CJK Unified Ideograph | |
317 0xee77 -> U+8362 -> 0xfb94 CJK Unified Ideograph | |
318 0xee78 -> U+837f -> 0xfb95 CJK Unified Ideograph | |
319 0xee79 -> U+83c7 -> 0xfb96 CJK Unified Ideograph | |
320 0xee7a -> U+83f6 -> 0xfb97 CJK Unified Ideograph | |
321 0xee7b -> U+8448 -> 0xfb98 CJK Unified Ideograph | |
322 0xee7c -> U+84b4 -> 0xfb99 CJK Unified Ideograph | |
323 0xee7d -> U+8553 -> 0xfb9a CJK Unified Ideograph | |
324 0xee7e -> U+8559 -> 0xfb9b CJK Unified Ideograph | |
325 0xee80 -> U+856b -> 0xfb9c CJK Unified Ideograph | |
326 0xee81 -> U+fa1f -> 0xfb9d CJK compatibility Ideograph | |
327 0xee82 -> U+85b0 -> 0xfb9e CJK Unified Ideograph | |
328 0xee83 -> U+fa20 -> 0xfb9f CJK compatibility Ideograph | |
329 0xee84 -> U+fa21 -> 0xfba0 CJK compatibility Ideograph | |
330 0xee85 -> U+8807 -> 0xfba1 CJK Unified Ideograph | |
331 0xee86 -> U+88f5 -> 0xfba2 CJK Unified Ideograph | |
332 0xee87 -> U+8a12 -> 0xfba3 CJK Unified Ideograph | |
333 0xee88 -> U+8a37 -> 0xfba4 CJK Unified Ideograph | |
334 0xee89 -> U+8a79 -> 0xfba5 CJK Unified Ideograph | |
335 0xee8a -> U+8aa7 -> 0xfba6 CJK Unified Ideograph | |
336 0xee8b -> U+8abe -> 0xfba7 CJK Unified Ideograph | |
337 0xee8c -> U+8adf -> 0xfba8 CJK Unified Ideograph | |
338 0xee8d -> U+fa22 -> 0xfba9 CJK compatibility Ideograph | |
339 0xee8e -> U+8af6 -> 0xfbaa CJK Unified Ideograph | |
340 0xee8f -> U+8b53 -> 0xfbab CJK Unified Ideograph | |
341 0xee90 -> U+8b7f -> 0xfbac CJK Unified Ideograph | |
342 0xee91 -> U+8cf0 -> 0xfbad CJK Unified Ideograph | |
343 0xee92 -> U+8cf4 -> 0xfbae CJK Unified Ideograph | |
344 0xee93 -> U+8d12 -> 0xfbaf CJK Unified Ideograph | |
345 0xee94 -> U+8d76 -> 0xfbb0 CJK Unified Ideograph | |
346 0xee95 -> U+fa23 -> 0xfbb1 CJK compatibility Ideograph | |
347 0xee96 -> U+8ecf -> 0xfbb2 CJK Unified Ideograph | |
348 0xee97 -> U+fa24 -> 0xfbb3 CJK compatibility Ideograph | |
349 0xee98 -> U+fa25 -> 0xfbb4 CJK compatibility Ideograph | |
350 0xee99 -> U+9067 -> 0xfbb5 CJK Unified Ideograph | |
351 0xee9a -> U+90de -> 0xfbb6 CJK Unified Ideograph | |
352 0xee9b -> U+fa26 -> 0xfbb7 CJK compatibility Ideograph | |
353 0xee9c -> U+9115 -> 0xfbb8 CJK Unified Ideograph | |
354 0xee9d -> U+9127 -> 0xfbb9 CJK Unified Ideograph | |
355 0xee9e -> U+91da -> 0xfbba CJK Unified Ideograph | |
356 0xee9f -> U+91d7 -> 0xfbbb CJK Unified Ideograph | |
357 0xeea0 -> U+91de -> 0xfbbc CJK Unified Ideograph | |
358 0xeea1 -> U+91ed -> 0xfbbd CJK Unified Ideograph | |
359 0xeea2 -> U+91ee -> 0xfbbe CJK Unified Ideograph | |
360 0xeea3 -> U+91e4 -> 0xfbbf CJK Unified Ideograph | |
361 0xeea4 -> U+91e5 -> 0xfbc0 CJK Unified Ideograph | |
362 0xeea5 -> U+9206 -> 0xfbc1 CJK Unified Ideograph | |
363 0xeea6 -> U+9210 -> 0xfbc2 CJK Unified Ideograph | |
364 0xeea7 -> U+920a -> 0xfbc3 CJK Unified Ideograph | |
365 0xeea8 -> U+923a -> 0xfbc4 CJK Unified Ideograph | |
366 0xeea9 -> U+9240 -> 0xfbc5 CJK Unified Ideograph | |
367 0xeeaa -> U+923c -> 0xfbc6 CJK Unified Ideograph | |
368 0xeeab -> U+924e -> 0xfbc7 CJK Unified Ideograph | |
369 0xeeac -> U+9259 -> 0xfbc8 CJK Unified Ideograph | |
370 0xeead -> U+9251 -> 0xfbc9 CJK Unified Ideograph | |
371 0xeeae -> U+9239 -> 0xfbca CJK Unified Ideograph | |
372 0xeeaf -> U+9267 -> 0xfbcb CJK Unified Ideograph | |
373 0xeeb0 -> U+92a7 -> 0xfbcc CJK Unified Ideograph | |
374 0xeeb1 -> U+9277 -> 0xfbcd CJK Unified Ideograph | |
375 0xeeb2 -> U+9278 -> 0xfbce CJK Unified Ideograph | |
376 0xeeb3 -> U+92e7 -> 0xfbcf CJK Unified Ideograph | |
377 0xeeb4 -> U+92d7 -> 0xfbd0 CJK Unified Ideograph | |
378 0xeeb5 -> U+92d9 -> 0xfbd1 CJK Unified Ideograph | |
379 0xeeb6 -> U+92d0 -> 0xfbd2 CJK Unified Ideograph | |
380 0xeeb7 -> U+fa27 -> 0xfbd3 CJK compatibility Ideograph | |
381 0xeeb8 -> U+92d5 -> 0xfbd4 CJK Unified Ideograph | |
382 0xeeb9 -> U+92e0 -> 0xfbd5 CJK Unified Ideograph | |
383 0xeeba -> U+92d3 -> 0xfbd6 CJK Unified Ideograph | |
384 0xeebb -> U+9325 -> 0xfbd7 CJK Unified Ideograph | |
385 0xeebc -> U+9321 -> 0xfbd8 CJK Unified Ideograph | |
386 0xeebd -> U+92fb -> 0xfbd9 CJK Unified Ideograph | |
387 0xeebe -> U+fa28 -> 0xfbda CJK compatibility Ideograph | |
388 0xeebf -> U+931e -> 0xfbdb CJK Unified Ideograph | |
389 0xeec0 -> U+92ff -> 0xfbdc CJK Unified Ideograph | |
390 0xeec1 -> U+931d -> 0xfbdd CJK Unified Ideograph | |
391 0xeec2 -> U+9302 -> 0xfbde CJK Unified Ideograph | |
392 0xeec3 -> U+9370 -> 0xfbdf CJK Unified Ideograph | |
393 0xeec4 -> U+9357 -> 0xfbe0 CJK Unified Ideograph | |
394 0xeec5 -> U+93a4 -> 0xfbe1 CJK Unified Ideograph | |
395 0xeec6 -> U+93c6 -> 0xfbe2 CJK Unified Ideograph | |
396 0xeec7 -> U+93de -> 0xfbe3 CJK Unified Ideograph | |
397 0xeec8 -> U+93f8 -> 0xfbe4 CJK Unified Ideograph | |
398 0xeec9 -> U+9431 -> 0xfbe5 CJK Unified Ideograph | |
399 0xeeca -> U+9445 -> 0xfbe6 CJK Unified Ideograph | |
400 0xeecb -> U+9448 -> 0xfbe7 CJK Unified Ideograph | |
401 0xeecc -> U+9592 -> 0xfbe8 CJK Unified Ideograph | |
402 0xeecd -> U+f9dc -> 0xfbe9 CJK compatibility Ideograph | |
403 0xeece -> U+fa29 -> 0xfbea CJK compatibility Ideograph | |
404 0xeecf -> U+969d -> 0xfbeb CJK Unified Ideograph | |
405 0xeed0 -> U+96af -> 0xfbec CJK Unified Ideograph | |
406 0xeed1 -> U+9733 -> 0xfbed CJK Unified Ideograph | |
407 0xeed2 -> U+973b -> 0xfbee CJK Unified Ideograph | |
408 0xeed3 -> U+9743 -> 0xfbef CJK Unified Ideograph | |
409 0xeed4 -> U+974d -> 0xfbf0 CJK Unified Ideograph | |
410 0xeed5 -> U+974f -> 0xfbf1 CJK Unified Ideograph | |
411 0xeed6 -> U+9751 -> 0xfbf2 CJK Unified Ideograph | |
412 0xeed7 -> U+9755 -> 0xfbf3 CJK Unified Ideograph | |
413 0xeed8 -> U+9857 -> 0xfbf4 CJK Unified Ideograph | |
414 0xeed9 -> U+9865 -> 0xfbf5 CJK Unified Ideograph | |
415 0xeeda -> U+fa2a -> 0xfbf6 CJK compatibility Ideograph | |
416 0xeedb -> U+fa2b -> 0xfbf7 CJK compatibility Ideograph | |
417 0xeedc -> U+9927 -> 0xfbf8 CJK Unified Ideograph | |
418 0xeedd -> U+fa2c -> 0xfbf9 CJK compatibility Ideograph | |
419 0xeede -> U+999e -> 0xfbfa CJK Unified Ideograph | |
420 0xeedf -> U+9a4e -> 0xfbfb CJK Unified Ideograph | |
421 0xeee0 -> U+9ad9 -> 0xfbfc CJK Unified Ideograph | |
422 0xeee1 -> U+9adc -> 0xfc40 CJK Unified Ideograph | |
423 0xeee2 -> U+9b75 -> 0xfc41 CJK Unified Ideograph | |
424 0xeee3 -> U+9b72 -> 0xfc42 CJK Unified Ideograph | |
425 0xeee4 -> U+9b8f -> 0xfc43 CJK Unified Ideograph | |
426 0xeee5 -> U+9bb1 -> 0xfc44 CJK Unified Ideograph | |
427 0xeee6 -> U+9bbb -> 0xfc45 CJK Unified Ideograph | |
428 0xeee7 -> U+9c00 -> 0xfc46 CJK Unified Ideograph | |
429 0xeee8 -> U+9d70 -> 0xfc47 CJK Unified Ideograph | |
430 0xeee9 -> U+9d6b -> 0xfc48 CJK Unified Ideograph | |
431 0xeeea -> U+fa2d -> 0xfc49 CJK compatibility Ideograph | |
432 0xeeeb -> U+9e19 -> 0xfc4a CJK Unified Ideograph | |
433 0xeeec -> U+9ed1 -> 0xfc4b CJK Unified Ideograph | |
434 0xeeef -> U+2170 -> 0xfa40 Small Roman Numeral One | |
435 0xeef0 -> U+2171 -> 0xfa41 Small Roman Numeral Two | |
436 0xeef1 -> U+2172 -> 0xfa42 Small Roman Numeral Three | |
437 0xeef2 -> U+2173 -> 0xfa43 Small Roman Numeral Four | |
438 0xeef3 -> U+2174 -> 0xfa44 Small Roman Numeral Five | |
439 0xeef4 -> U+2175 -> 0xfa45 Small Roman Numeral Six | |
440 0xeef5 -> U+2176 -> 0xfa46 Small Roman Numeral Seven | |
441 0xeef6 -> U+2177 -> 0xfa47 Small Roman Numeral Eight | |
442 0xeef7 -> U+2178 -> 0xfa48 Small Roman Numeral Nine | |
443 0xeef8 -> U+2179 -> 0xfa49 Small Roman Numeral Ten | |
444 0xeef9 -> U+ffe2 -> 0x81ca Fullwidth Not Sign | |
445 0xeefa -> U+ffe4 -> 0xfa55 Fullwidth Broken Bar | |
446 0xeefb -> U+ff07 -> 0xfa56 Fullwidth Apostrophe | |
447 0xeefc -> U+ff02 -> 0xfa57 Fullwidth Quotation Mark | |
448 0xfa4a -> U+2160 -> 0x8754 Roman Numeral One | |
449 0xfa4b -> U+2161 -> 0x8755 Roman Numeral Two | |
450 0xfa4c -> U+2162 -> 0x8756 Roman Numeral Three | |
451 0xfa4d -> U+2163 -> 0x8757 Roman Numeral Four | |
452 0xfa4e -> U+2164 -> 0x8758 Roman Numeral Five | |
453 0xfa4f -> U+2165 -> 0x8759 Roman Numeral Six | |
454 0xfa50 -> U+2166 -> 0x875a Roman Numeral Seven | |
455 0xfa51 -> U+2167 -> 0x875b Roman Numeral Eight | |
456 0xfa52 -> U+2168 -> 0x875c Roman Numeral Nine | |
457 0xfa53 -> U+2169 -> 0x875d Roman Numeral Ten | |
458 0xfa54 -> U+ffe2 -> 0x81ca Fullwidth Not Sign | |
459 0xfa58 -> U+3231 -> 0x878a Parenthesized Ideograph Stock | |
460 0xfa59 -> U+2116 -> 0x8782 Numero Sign | |
461 0xfa5a -> U+2121 -> 0x8784 Telephone Sign | |
462 0xfa5b -> U+2235 -> 0x81e6 Because")) | |
463 (with-input-from-string (s kb170559) | |
464 (loop for line = (read-line s nil) until (null line) | |
465 do (let ((ucs (parse-integer (subseq line 14 18) :radix 16)) | |
466 (cp932 (parse-integer (subseq line 26 30) :radix 16))) | |
467 (setf (gethash ucs *ucs-to-cp932-hash*) cp932))))) | |
468 | |
469 (defun eucjp-to-ucs (code) | |
470 (values (gethash code *eucjp-to-ucs-hash*))) | |
471 | |
472 (defun ucs-to-eucjp (code) | |
473 (values (gethash code *ucs-to-eucjp-hash*))) | |
474 | |
475 (defun cp932-to-ucs (code) | |
476 (values (gethash code *cp932-to-ucs-hash*))) | |
477 | |
478 (defun ucs-to-cp932 (code) | |
479 (values (gethash code *ucs-to-cp932-hash*))) | |
480 | |
481 ;;;; EUC-JP | |
482 | |
483 (define-character-encoding :eucjp | |
484 "An 8-bit, variable-length character encoding in which | |
485 character code points in the range #x00-#x7f can be encoded in a | |
486 single octet; characters with larger code values can be encoded | |
487 in 2 to 3 bytes." | |
488 :max-units-per-char 3 | |
489 :literal-char-code-limit #x80) | |
490 | |
491 | |
492 (define-octet-counter :eucjp (getter type) | |
493 `(named-lambda eucjp-octet-counter (seq start end max) | |
494 (declare (type ,type seq) (fixnum start end max)) | |
495 (loop with noctets fixnum = 0 | |
496 for i fixnum from start below end | |
497 for code of-type code-point = (,getter seq i) | |
498 do (let* ((c (ucs-to-eucjp code)) | |
499 (new (+ (cond ((< #xffff c) 3) | |
500 ((< #xff c) 2) | |
501 (t 1)) | |
502 noctets))) | |
503 (if (and (plusp max) (> new max)) | |
504 (loop-finish) | |
505 (setq noctets new))) | |
506 finally (return (values noctets i))))) | |
507 | |
508 (define-code-point-counter :eucjp (getter type) | |
509 `(named-lambda eucjp-code-point-counter (seq start end max) | |
510 (declare (type ,type seq) (fixnum start end max)) | |
511 (loop with nchars fixnum = 0 | |
512 with i fixnum = start | |
513 while (< i end) do | |
514 (let* ((octet (,getter seq i)) | |
515 (next-i (+ i (cond ((= #x8f octet) 3) | |
516 ((or (< #xa0 octet #xff) | |
517 (= #x8e octet)) 2) | |
518 (t 1))))) | |
519 (declare (type ub8 octet) (fixnum next-i)) | |
520 (cond ((> next-i end) | |
521 ;; Should we add restarts to this error, we'll have | |
522 ;; to figure out a way to communicate with the | |
523 ;; decoder since we probably want to do something | |
524 ;; about it right here when we have a chance to | |
525 ;; change the count or something. (Like an | |
526 ;; alternative replacement character or perhaps the | |
527 ;; existence of this error so that the decoder | |
528 ;; doesn't have to check for it on every iteration | |
529 ;; like we do.) | |
530 ;; | |
531 ;; FIXME: The data for this error is not right. | |
532 (decoding-error (vector octet) :eucjp seq i | |
533 nil 'end-of-input-in-character) | |
534 (return (values (1+ nchars) end))) | |
535 (t | |
536 (setq nchars (1+ nchars) | |
537 i next-i) | |
538 (when (and (plusp max) (= nchars max)) | |
539 (return (values nchars i)))))) | |
540 finally (progn (assert (= i end)) | |
541 (return (values nchars i)))))) | |
542 | |
543 (define-encoder :eucjp (getter src-type setter dest-type) | |
544 `(named-lambda eucjp-encoder (src start end dest d-start) | |
545 (declare (type ,src-type src) | |
546 (type ,dest-type dest) | |
547 (fixnum start end d-start)) | |
548 (loop with di fixnum = d-start | |
549 for i fixnum from start below end | |
550 for code of-type code-point = (,getter src i) | |
551 for eucjp of-type code-point | |
552 = (ucs-to-eucjp code) do | |
553 (macrolet ((set-octet (offset value) | |
554 `(,',setter ,value dest (the fixnum (+ di ,o… | |
555 (cond | |
556 ;; 1 octet | |
557 ((< eucjp #x100) | |
558 (set-octet 0 eucjp) | |
559 (incf di)) | |
560 ;; 2 octets | |
561 ((< eucjp #x10000) | |
562 (set-octet 0 (f-logand #xff (f-ash eucjp -8))) | |
563 (set-octet 1 (logand eucjp #xff)) | |
564 (incf di 2)) | |
565 ;; 3 octets | |
566 (t | |
567 (set-octet 0 (f-logand #xff (f-ash eucjp -16))) | |
568 (set-octet 1 (f-logand #xff (f-ash eucjp -8))) | |
569 (set-octet 2 (logand eucjp #xff)) | |
570 (incf di 3)) | |
571 )) | |
572 finally (return (the fixnum (- di d-start)))))) | |
573 | |
574 | |
575 (define-decoder :eucjp (getter src-type setter dest-type) | |
576 `(named-lambda eucjp-decoder (src start end dest d-start) | |
577 (declare (type ,src-type src) | |
578 (type ,dest-type dest) | |
579 (fixnum start end d-start)) | |
580 (let ((u2 0)) | |
581 (declare (type ub8 u2)) | |
582 (loop for di fixnum from d-start | |
583 for i fixnum from start below end | |
584 for u1 of-type ub8 = (,getter src i) do | |
585 ;; Note: CONSUME-OCTET doesn't check if I is being | |
586 ;; incremented past END. We're assuming that END has | |
587 ;; been calculated with the CODE-POINT-POINTER above that | |
588 ;; checks this. | |
589 (macrolet | |
590 ((consume-octet () | |
591 `(let ((next-i (incf i))) | |
592 (if (= next-i end) | |
593 ;; FIXME: data for this error is incomplete. | |
594 ;; and signalling this error twice | |
595 (return-from setter-block | |
596 (decoding-error nil :eucjp src i +repl+ | |
597 'end-of-input-in-characte… | |
598 (,',getter src next-i)))) | |
599 (handle-error (n &optional (c 'character-decoding-er… | |
600 `(decoding-error | |
601 (vector ,@(subseq '(u1 u2) 0 n)) | |
602 :eucjp src (1+ (- i ,n)) +repl+ ',c)) | |
603 (handle-error-if-icb (var n) | |
604 `(when (not (< #x7f ,var #xc0)) | |
605 (decf i) | |
606 (return-from setter-block | |
607 (handle-error ,n invalid-utf8-continuation-by… | |
608 (,setter | |
609 (block setter-block | |
610 (cond | |
611 ;; 3 octets | |
612 ((= u1 #x8f) | |
613 (setq u2 (consume-octet)) | |
614 (eucjp-to-ucs (logior #x8f0000 | |
615 (f-ash u2 8) | |
616 (consume-octet)))) | |
617 ;; 2 octets | |
618 ((or (= u1 #x8e) | |
619 (< #xa0 u1 #xff)) | |
620 (eucjp-to-ucs (logior (f-ash u1 8) | |
621 (consume-octet)))) | |
622 ;; 1 octet | |
623 (t | |
624 (eucjp-to-ucs u1)))) | |
625 dest di)) | |
626 finally (return (the fixnum (- di d-start))))))) | |
627 | |
628 ;;;; CP932 | |
629 | |
630 (define-character-encoding :cp932 | |
631 "An 8-bit, variable-length character encoding in which | |
632 character code points in the range #x00-#x7f can be encoded in a | |
633 single octet; characters with larger code values can be encoded | |
634 in 2 bytes." | |
635 :max-units-per-char 2 | |
636 :literal-char-code-limit #x80) | |
637 | |
638 | |
639 (define-octet-counter :cp932 (getter type) | |
640 `(named-lambda cp932-octet-counter (seq start end max) | |
641 (declare (type ,type seq) (fixnum start end max)) | |
642 (loop with noctets fixnum = 0 | |
643 for i fixnum from start below end | |
644 for code of-type code-point = (,getter seq i) | |
645 do (let* ((c (ucs-to-cp932 code)) | |
646 (new (+ (cond ((< #xff c) 2) | |
647 (t 1)) | |
648 noctets))) | |
649 (if (and (plusp max) (> new max)) | |
650 (loop-finish) | |
651 (setq noctets new))) | |
652 finally (return (values noctets i))))) | |
653 | |
654 (define-code-point-counter :cp932 (getter type) | |
655 `(named-lambda cp932-code-point-counter (seq start end max) | |
656 (declare (type ,type seq) (fixnum start end max)) | |
657 (loop with nchars fixnum = 0 | |
658 with i fixnum = start | |
659 while (< i end) do | |
660 (let* ((octet (,getter seq i)) | |
661 (next-i (+ i (cond ((or (<= #x81 octet #x9f) | |
662 (<= #xe0 octet #xfc)) | |
663 2) | |
664 (t 1))))) | |
665 (declare (type ub8 octet) (fixnum next-i)) | |
666 (cond ((> next-i end) | |
667 ;; Should we add restarts to this error, we'll have | |
668 ;; to figure out a way to communicate with the | |
669 ;; decoder since we probably want to do something | |
670 ;; about it right here when we have a chance to | |
671 ;; change the count or something. (Like an | |
672 ;; alternative replacement character or perhaps the | |
673 ;; existence of this error so that the decoder | |
674 ;; doesn't have to check for it on every iteration | |
675 ;; like we do.) | |
676 ;; | |
677 ;; FIXME: The data for this error is not right. | |
678 (decoding-error (vector octet) :cp932 seq i | |
679 nil 'end-of-input-in-character) | |
680 (return (values (1+ nchars) end))) | |
681 (t | |
682 (setq nchars (1+ nchars) | |
683 i next-i) | |
684 (when (and (plusp max) (= nchars max)) | |
685 (return (values nchars i)))))) | |
686 finally (progn (assert (= i end)) | |
687 (return (values nchars i)))))) | |
688 | |
689 (define-encoder :cp932 (getter src-type setter dest-type) | |
690 `(named-lambda cp932-encoder (src start end dest d-start) | |
691 (declare (type ,src-type src) | |
692 (type ,dest-type dest) | |
693 (fixnum start end d-start)) | |
694 (loop with di fixnum = d-start | |
695 for i fixnum from start below end | |
696 for code of-type code-point = (,getter src i) | |
697 for cp932 of-type code-point | |
698 = (ucs-to-cp932 code) do | |
699 (macrolet ((set-octet (offset value) | |
700 `(,',setter ,value dest (the fixnum (+ di ,o… | |
701 (cond | |
702 ;; 1 octet | |
703 ((< cp932 #x100) | |
704 (set-octet 0 cp932) | |
705 (incf di)) | |
706 ;; 2 octets | |
707 ((< cp932 #x10000) | |
708 (set-octet 0 (f-logand #xff (f-ash cp932 -8))) | |
709 (set-octet 1 (logand cp932 #xff)) | |
710 (incf di 2)) | |
711 ;; 3 octets | |
712 (t | |
713 (set-octet 0 (f-logand #xff (f-ash cp932 -16))) | |
714 (set-octet 1 (f-logand #xff (f-ash cp932 -8))) | |
715 (set-octet 2 (logand cp932 #xff)) | |
716 (incf di 3)) | |
717 )) | |
718 finally (return (the fixnum (- di d-start)))))) | |
719 | |
720 | |
721 (define-decoder :cp932 (getter src-type setter dest-type) | |
722 `(named-lambda cp932-decoder (src start end dest d-start) | |
723 (declare (type ,src-type src) | |
724 (type ,dest-type dest) | |
725 (fixnum start end d-start)) | |
726 (let ((u2 0)) | |
727 (declare (type ub8 u2)) | |
728 (loop for di fixnum from d-start | |
729 for i fixnum from start below end | |
730 for u1 of-type ub8 = (,getter src i) do | |
731 ;; Note: CONSUME-OCTET doesn't check if I is being | |
732 ;; incremented past END. We're assuming that END has | |
733 ;; been calculated with the CODE-POINT-POINTER above that | |
734 ;; checks this. | |
735 (macrolet | |
736 ((consume-octet () | |
737 `(let ((next-i (incf i))) | |
738 (if (= next-i end) | |
739 ;; FIXME: data for this error is incomplete. | |
740 ;; and signalling this error twice | |
741 (return-from setter-block | |
742 (decoding-error nil :cp932 src i +repl+ | |
743 'end-of-input-in-characte… | |
744 (,',getter src next-i)))) | |
745 (handle-error (n &optional (c 'character-decoding-er… | |
746 `(decoding-error | |
747 (vector ,@(subseq '(u1 u2) 0 n)) | |
748 :cp932 src (1+ (- i ,n)) +repl+ ',c)) | |
749 (handle-error-if-icb (var n) | |
750 `(when (not (< #x7f ,var #xc0)) | |
751 (decf i) | |
752 (return-from setter-block | |
753 (handle-error ,n invalid-utf8-continuation-by… | |
754 (,setter | |
755 (block setter-block | |
756 (cond | |
757 ;; 2 octets | |
758 ((or (<= #x81 u1 #x9f) | |
759 (<= #xe0 u1 #xfc)) | |
760 (setq u2 (consume-octet)) | |
761 (cp932-to-ucs (logior (f-ash u1 8) | |
762 u2))) | |
763 ;; 1 octet | |
764 (t | |
765 (cp932-to-ucs u1)))) | |
766 dest di)) | |
767 finally (return (the fixnum (- di d-start))))))) |