| tenc-jpn.lisp - clic - Clic is an command line interactive client for gopher wr… | |
| git clone git://bitreich.org/clic/ git://hg6vgqziawt5s4dj.onion/clic/ | |
| Log | |
| Files | |
| Refs | |
| Tags | |
| LICENSE | |
| --- | |
| tenc-jpn.lisp (37967B) | |
| --- | |
| 1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*- | |
| 2 ;;; | |
| 3 ;;; enc-jpn.lisp --- Japanese encodings. | |
| 4 ;;; | |
| 5 | |
| 6 (in-package #:babel-encodings) | |
| 7 | |
| 8 ;;;; helper functions | |
| 9 (defvar *eucjp-to-ucs-hash* (make-hash-table)) | |
| 10 (defvar *ucs-to-eucjp-hash* (make-hash-table)) | |
| 11 (defvar *cp932-to-ucs-hash* (make-hash-table)) | |
| 12 (defvar *ucs-to-cp932-hash* (make-hash-table)) | |
| 13 | |
| 14 (dolist (i `((,*cp932-only* | |
| 15 ,*cp932-to-ucs-hash* | |
| 16 ,*ucs-to-cp932-hash*) | |
| 17 (,*eucjp-only* | |
| 18 ,*eucjp-to-ucs-hash* | |
| 19 ,*ucs-to-eucjp-hash*) | |
| 20 (,*eucjp* | |
| 21 ,*eucjp-to-ucs-hash* | |
| 22 ,*ucs-to-eucjp-hash*))) | |
| 23 (dolist (j (first i)) | |
| 24 (setf (gethash (car j) (second i)) (cadr j)) | |
| 25 (setf (gethash (cadr j) (third i)) (car j)))) | |
| 26 | |
| 27 (flet ((euc-cp932 (x) | |
| 28 (let ((high (ash x -16)) | |
| 29 (mid (logand (ash x -8) 255)) | |
| 30 (low (logand x 255))) | |
| 31 (cond ((not (zerop high)) | |
| 32 nil) | |
| 33 ((= mid #x8e) | |
| 34 (logand x 255)) | |
| 35 ((zerop mid) | |
| 36 x) | |
| 37 ((decf mid #xa1) | |
| 38 (decf low #x80) | |
| 39 (incf low (if (zerop (logand mid 1)) #x1f #x7e)) | |
| 40 (incf low (if (<= #x7f low #x9d) 1 0)) | |
| 41 (setq mid (ash mid -1)) | |
| 42 (incf mid (if (<= mid #x1e) #x81 #xc1)) | |
| 43 (+ (ash mid 8) low)))))) | |
| 44 (dolist (i *eucjp*) | |
| 45 (let ((cp932 (euc-cp932 (first i)))) | |
| 46 (when cp932 | |
| 47 (setf (gethash cp932 *cp932-to-ucs-hash*) (second i)) | |
| 48 (setf (gethash (second i) *ucs-to-cp932-hash*) cp932))))) | |
| 49 | |
| 50 ;ascii | |
| 51 (loop for i from #x00 to #x7f do | |
| 52 (setf (gethash i *cp932-to-ucs-hash*) i) | |
| 53 (setf (gethash i *eucjp-to-ucs-hash*) i) | |
| 54 (setf (gethash i *ucs-to-eucjp-hash*) i) | |
| 55 (setf (gethash i *ucs-to-cp932-hash*) i)) | |
| 56 | |
| 57 ;half-width katakana | |
| 58 (loop for i from #xa1 to #xdf do | |
| 59 (setf (gethash i *cp932-to-ucs-hash*) (+ #xff61 #x-a1 i)) | |
| 60 (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-cp932-hash*) i) | |
| 61 (setf (gethash (+ #x8e00 i) *eucjp-to-ucs-hash*) (+ #xff61 #x-a1 i… | |
| 62 (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-eucjp-hash*) (+ #x8e00 i… | |
| 63 | |
| 64 ;; This is quoted from https://support.microsoft.com/en-us/kb/170559/en-… | |
| 65 (let ((kb170559 "0x8790 -> U+2252 -> 0x81e0 Approximately Equal To… | |
| 66 0x8791 -> U+2261 -> 0x81df Identical To | |
| 67 0x8792 -> U+222b -> 0x81e7 Integral | |
| 68 0x8795 -> U+221a -> 0x81e3 Square Root | |
| 69 0x8796 -> U+22a5 -> 0x81db Up Tack | |
| 70 0x8797 -> U+2220 -> 0x81da Angle | |
| 71 0x879a -> U+2235 -> 0x81e6 Because | |
| 72 0x879b -> U+2229 -> 0x81bf Intersection | |
| 73 0x879c -> U+222a -> 0x81be Union | |
| 74 0xed40 -> U+7e8a -> 0xfa5c CJK Unified Ideograph | |
| 75 0xed41 -> U+891c -> 0xfa5d CJK Unified Ideograph | |
| 76 0xed42 -> U+9348 -> 0xfa5e CJK Unified Ideograph | |
| 77 0xed43 -> U+9288 -> 0xfa5f CJK Unified Ideograph | |
| 78 0xed44 -> U+84dc -> 0xfa60 CJK Unified Ideograph | |
| 79 0xed45 -> U+4fc9 -> 0xfa61 CJK Unified Ideograph | |
| 80 0xed46 -> U+70bb -> 0xfa62 CJK Unified Ideograph | |
| 81 0xed47 -> U+6631 -> 0xfa63 CJK Unified Ideograph | |
| 82 0xed48 -> U+68c8 -> 0xfa64 CJK Unified Ideograph | |
| 83 0xed49 -> U+92f9 -> 0xfa65 CJK Unified Ideograph | |
| 84 0xed4a -> U+66fb -> 0xfa66 CJK Unified Ideograph | |
| 85 0xed4b -> U+5f45 -> 0xfa67 CJK Unified Ideograph | |
| 86 0xed4c -> U+4e28 -> 0xfa68 CJK Unified Ideograph | |
| 87 0xed4d -> U+4ee1 -> 0xfa69 CJK Unified Ideograph | |
| 88 0xed4e -> U+4efc -> 0xfa6a CJK Unified Ideograph | |
| 89 0xed4f -> U+4f00 -> 0xfa6b CJK Unified Ideograph | |
| 90 0xed50 -> U+4f03 -> 0xfa6c CJK Unified Ideograph | |
| 91 0xed51 -> U+4f39 -> 0xfa6d CJK Unified Ideograph | |
| 92 0xed52 -> U+4f56 -> 0xfa6e CJK Unified Ideograph | |
| 93 0xed53 -> U+4f92 -> 0xfa6f CJK Unified Ideograph | |
| 94 0xed54 -> U+4f8a -> 0xfa70 CJK Unified Ideograph | |
| 95 0xed55 -> U+4f9a -> 0xfa71 CJK Unified Ideograph | |
| 96 0xed56 -> U+4f94 -> 0xfa72 CJK Unified Ideograph | |
| 97 0xed57 -> U+4fcd -> 0xfa73 CJK Unified Ideograph | |
| 98 0xed58 -> U+5040 -> 0xfa74 CJK Unified Ideograph | |
| 99 0xed59 -> U+5022 -> 0xfa75 CJK Unified Ideograph | |
| 100 0xed5a -> U+4fff -> 0xfa76 CJK Unified Ideograph | |
| 101 0xed5b -> U+501e -> 0xfa77 CJK Unified Ideograph | |
| 102 0xed5c -> U+5046 -> 0xfa78 CJK Unified Ideograph | |
| 103 0xed5d -> U+5070 -> 0xfa79 CJK Unified Ideograph | |
| 104 0xed5e -> U+5042 -> 0xfa7a CJK Unified Ideograph | |
| 105 0xed5f -> U+5094 -> 0xfa7b CJK Unified Ideograph | |
| 106 0xed60 -> U+50f4 -> 0xfa7c CJK Unified Ideograph | |
| 107 0xed61 -> U+50d8 -> 0xfa7d CJK Unified Ideograph | |
| 108 0xed62 -> U+514a -> 0xfa7e CJK Unified Ideograph | |
| 109 0xed63 -> U+5164 -> 0xfa80 CJK Unified Ideograph | |
| 110 0xed64 -> U+519d -> 0xfa81 CJK Unified Ideograph | |
| 111 0xed65 -> U+51be -> 0xfa82 CJK Unified Ideograph | |
| 112 0xed66 -> U+51ec -> 0xfa83 CJK Unified Ideograph | |
| 113 0xed67 -> U+5215 -> 0xfa84 CJK Unified Ideograph | |
| 114 0xed68 -> U+529c -> 0xfa85 CJK Unified Ideograph | |
| 115 0xed69 -> U+52a6 -> 0xfa86 CJK Unified Ideograph | |
| 116 0xed6a -> U+52c0 -> 0xfa87 CJK Unified Ideograph | |
| 117 0xed6b -> U+52db -> 0xfa88 CJK Unified Ideograph | |
| 118 0xed6c -> U+5300 -> 0xfa89 CJK Unified Ideograph | |
| 119 0xed6d -> U+5307 -> 0xfa8a CJK Unified Ideograph | |
| 120 0xed6e -> U+5324 -> 0xfa8b CJK Unified Ideograph | |
| 121 0xed6f -> U+5372 -> 0xfa8c CJK Unified Ideograph | |
| 122 0xed70 -> U+5393 -> 0xfa8d CJK Unified Ideograph | |
| 123 0xed71 -> U+53b2 -> 0xfa8e CJK Unified Ideograph | |
| 124 0xed72 -> U+53dd -> 0xfa8f CJK Unified Ideograph | |
| 125 0xed73 -> U+fa0e -> 0xfa90 CJK compatibility Ideograph | |
| 126 0xed74 -> U+549c -> 0xfa91 CJK Unified Ideograph | |
| 127 0xed75 -> U+548a -> 0xfa92 CJK Unified Ideograph | |
| 128 0xed76 -> U+54a9 -> 0xfa93 CJK Unified Ideograph | |
| 129 0xed77 -> U+54ff -> 0xfa94 CJK Unified Ideograph | |
| 130 0xed78 -> U+5586 -> 0xfa95 CJK Unified Ideograph | |
| 131 0xed79 -> U+5759 -> 0xfa96 CJK Unified Ideograph | |
| 132 0xed7a -> U+5765 -> 0xfa97 CJK Unified Ideograph | |
| 133 0xed7b -> U+57ac -> 0xfa98 CJK Unified Ideograph | |
| 134 0xed7c -> U+57c8 -> 0xfa99 CJK Unified Ideograph | |
| 135 0xed7d -> U+57c7 -> 0xfa9a CJK Unified Ideograph | |
| 136 0xed7e -> U+fa0f -> 0xfa9b CJK compatibility Ideograph | |
| 137 0xed80 -> U+fa10 -> 0xfa9c CJK compatibility Ideograph | |
| 138 0xed81 -> U+589e -> 0xfa9d CJK Unified Ideograph | |
| 139 0xed82 -> U+58b2 -> 0xfa9e CJK Unified Ideograph | |
| 140 0xed83 -> U+590b -> 0xfa9f CJK Unified Ideograph | |
| 141 0xed84 -> U+5953 -> 0xfaa0 CJK Unified Ideograph | |
| 142 0xed85 -> U+595b -> 0xfaa1 CJK Unified Ideograph | |
| 143 0xed86 -> U+595d -> 0xfaa2 CJK Unified Ideograph | |
| 144 0xed87 -> U+5963 -> 0xfaa3 CJK Unified Ideograph | |
| 145 0xed88 -> U+59a4 -> 0xfaa4 CJK Unified Ideograph | |
| 146 0xed89 -> U+59ba -> 0xfaa5 CJK Unified Ideograph | |
| 147 0xed8a -> U+5b56 -> 0xfaa6 CJK Unified Ideograph | |
| 148 0xed8b -> U+5bc0 -> 0xfaa7 CJK Unified Ideograph | |
| 149 0xed8c -> U+752f -> 0xfaa8 CJK Unified Ideograph | |
| 150 0xed8d -> U+5bd8 -> 0xfaa9 CJK Unified Ideograph | |
| 151 0xed8e -> U+5bec -> 0xfaaa CJK Unified Ideograph | |
| 152 0xed8f -> U+5c1e -> 0xfaab CJK Unified Ideograph | |
| 153 0xed90 -> U+5ca6 -> 0xfaac CJK Unified Ideograph | |
| 154 0xed91 -> U+5cba -> 0xfaad CJK Unified Ideograph | |
| 155 0xed92 -> U+5cf5 -> 0xfaae CJK Unified Ideograph | |
| 156 0xed93 -> U+5d27 -> 0xfaaf CJK Unified Ideograph | |
| 157 0xed94 -> U+5d53 -> 0xfab0 CJK Unified Ideograph | |
| 158 0xed95 -> U+fa11 -> 0xfab1 CJK compatibility Ideograph | |
| 159 0xed96 -> U+5d42 -> 0xfab2 CJK Unified Ideograph | |
| 160 0xed97 -> U+5d6d -> 0xfab3 CJK Unified Ideograph | |
| 161 0xed98 -> U+5db8 -> 0xfab4 CJK Unified Ideograph | |
| 162 0xed99 -> U+5db9 -> 0xfab5 CJK Unified Ideograph | |
| 163 0xed9a -> U+5dd0 -> 0xfab6 CJK Unified Ideograph | |
| 164 0xed9b -> U+5f21 -> 0xfab7 CJK Unified Ideograph | |
| 165 0xed9c -> U+5f34 -> 0xfab8 CJK Unified Ideograph | |
| 166 0xed9d -> U+5f67 -> 0xfab9 CJK Unified Ideograph | |
| 167 0xed9e -> U+5fb7 -> 0xfaba CJK Unified Ideograph | |
| 168 0xed9f -> U+5fde -> 0xfabb CJK Unified Ideograph | |
| 169 0xeda0 -> U+605d -> 0xfabc CJK Unified Ideograph | |
| 170 0xeda1 -> U+6085 -> 0xfabd CJK Unified Ideograph | |
| 171 0xeda2 -> U+608a -> 0xfabe CJK Unified Ideograph | |
| 172 0xeda3 -> U+60de -> 0xfabf CJK Unified Ideograph | |
| 173 0xeda4 -> U+60d5 -> 0xfac0 CJK Unified Ideograph | |
| 174 0xeda5 -> U+6120 -> 0xfac1 CJK Unified Ideograph | |
| 175 0xeda6 -> U+60f2 -> 0xfac2 CJK Unified Ideograph | |
| 176 0xeda7 -> U+6111 -> 0xfac3 CJK Unified Ideograph | |
| 177 0xeda8 -> U+6137 -> 0xfac4 CJK Unified Ideograph | |
| 178 0xeda9 -> U+6130 -> 0xfac5 CJK Unified Ideograph | |
| 179 0xedaa -> U+6198 -> 0xfac6 CJK Unified Ideograph | |
| 180 0xedab -> U+6213 -> 0xfac7 CJK Unified Ideograph | |
| 181 0xedac -> U+62a6 -> 0xfac8 CJK Unified Ideograph | |
| 182 0xedad -> U+63f5 -> 0xfac9 CJK Unified Ideograph | |
| 183 0xedae -> U+6460 -> 0xfaca CJK Unified Ideograph | |
| 184 0xedaf -> U+649d -> 0xfacb CJK Unified Ideograph | |
| 185 0xedb0 -> U+64ce -> 0xfacc CJK Unified Ideograph | |
| 186 0xedb1 -> U+654e -> 0xfacd CJK Unified Ideograph | |
| 187 0xedb2 -> U+6600 -> 0xface CJK Unified Ideograph | |
| 188 0xedb3 -> U+6615 -> 0xfacf CJK Unified Ideograph | |
| 189 0xedb4 -> U+663b -> 0xfad0 CJK Unified Ideograph | |
| 190 0xedb5 -> U+6609 -> 0xfad1 CJK Unified Ideograph | |
| 191 0xedb6 -> U+662e -> 0xfad2 CJK Unified Ideograph | |
| 192 0xedb7 -> U+661e -> 0xfad3 CJK Unified Ideograph | |
| 193 0xedb8 -> U+6624 -> 0xfad4 CJK Unified Ideograph | |
| 194 0xedb9 -> U+6665 -> 0xfad5 CJK Unified Ideograph | |
| 195 0xedba -> U+6657 -> 0xfad6 CJK Unified Ideograph | |
| 196 0xedbb -> U+6659 -> 0xfad7 CJK Unified Ideograph | |
| 197 0xedbc -> U+fa12 -> 0xfad8 CJK compatibility Ideograph | |
| 198 0xedbd -> U+6673 -> 0xfad9 CJK Unified Ideograph | |
| 199 0xedbe -> U+6699 -> 0xfada CJK Unified Ideograph | |
| 200 0xedbf -> U+66a0 -> 0xfadb CJK Unified Ideograph | |
| 201 0xedc0 -> U+66b2 -> 0xfadc CJK Unified Ideograph | |
| 202 0xedc1 -> U+66bf -> 0xfadd CJK Unified Ideograph | |
| 203 0xedc2 -> U+66fa -> 0xfade CJK Unified Ideograph | |
| 204 0xedc3 -> U+670e -> 0xfadf CJK Unified Ideograph | |
| 205 0xedc4 -> U+f929 -> 0xfae0 CJK compatibility Ideograph | |
| 206 0xedc5 -> U+6766 -> 0xfae1 CJK Unified Ideograph | |
| 207 0xedc6 -> U+67bb -> 0xfae2 CJK Unified Ideograph | |
| 208 0xedc7 -> U+6852 -> 0xfae3 CJK Unified Ideograph | |
| 209 0xedc8 -> U+67c0 -> 0xfae4 CJK Unified Ideograph | |
| 210 0xedc9 -> U+6801 -> 0xfae5 CJK Unified Ideograph | |
| 211 0xedca -> U+6844 -> 0xfae6 CJK Unified Ideograph | |
| 212 0xedcb -> U+68cf -> 0xfae7 CJK Unified Ideograph | |
| 213 0xedcc -> U+fa13 -> 0xfae8 CJK compatibility Ideograph | |
| 214 0xedcd -> U+6968 -> 0xfae9 CJK Unified Ideograph | |
| 215 0xedce -> U+fa14 -> 0xfaea CJK compatibility Ideograph | |
| 216 0xedcf -> U+6998 -> 0xfaeb CJK Unified Ideograph | |
| 217 0xedd0 -> U+69e2 -> 0xfaec CJK Unified Ideograph | |
| 218 0xedd1 -> U+6a30 -> 0xfaed CJK Unified Ideograph | |
| 219 0xedd2 -> U+6a6b -> 0xfaee CJK Unified Ideograph | |
| 220 0xedd3 -> U+6a46 -> 0xfaef CJK Unified Ideograph | |
| 221 0xedd4 -> U+6a73 -> 0xfaf0 CJK Unified Ideograph | |
| 222 0xedd5 -> U+6a7e -> 0xfaf1 CJK Unified Ideograph | |
| 223 0xedd6 -> U+6ae2 -> 0xfaf2 CJK Unified Ideograph | |
| 224 0xedd7 -> U+6ae4 -> 0xfaf3 CJK Unified Ideograph | |
| 225 0xedd8 -> U+6bd6 -> 0xfaf4 CJK Unified Ideograph | |
| 226 0xedd9 -> U+6c3f -> 0xfaf5 CJK Unified Ideograph | |
| 227 0xedda -> U+6c5c -> 0xfaf6 CJK Unified Ideograph | |
| 228 0xeddb -> U+6c86 -> 0xfaf7 CJK Unified Ideograph | |
| 229 0xeddc -> U+6c6f -> 0xfaf8 CJK Unified Ideograph | |
| 230 0xeddd -> U+6cda -> 0xfaf9 CJK Unified Ideograph | |
| 231 0xedde -> U+6d04 -> 0xfafa CJK Unified Ideograph | |
| 232 0xeddf -> U+6d87 -> 0xfafb CJK Unified Ideograph | |
| 233 0xede0 -> U+6d6f -> 0xfafc CJK Unified Ideograph | |
| 234 0xede1 -> U+6d96 -> 0xfb40 CJK Unified Ideograph | |
| 235 0xede2 -> U+6dac -> 0xfb41 CJK Unified Ideograph | |
| 236 0xede3 -> U+6dcf -> 0xfb42 CJK Unified Ideograph | |
| 237 0xede4 -> U+6df8 -> 0xfb43 CJK Unified Ideograph | |
| 238 0xede5 -> U+6df2 -> 0xfb44 CJK Unified Ideograph | |
| 239 0xede6 -> U+6dfc -> 0xfb45 CJK Unified Ideograph | |
| 240 0xede7 -> U+6e39 -> 0xfb46 CJK Unified Ideograph | |
| 241 0xede8 -> U+6e5c -> 0xfb47 CJK Unified Ideograph | |
| 242 0xede9 -> U+6e27 -> 0xfb48 CJK Unified Ideograph | |
| 243 0xedea -> U+6e3c -> 0xfb49 CJK Unified Ideograph | |
| 244 0xedeb -> U+6ebf -> 0xfb4a CJK Unified Ideograph | |
| 245 0xedec -> U+6f88 -> 0xfb4b CJK Unified Ideograph | |
| 246 0xeded -> U+6fb5 -> 0xfb4c CJK Unified Ideograph | |
| 247 0xedee -> U+6ff5 -> 0xfb4d CJK Unified Ideograph | |
| 248 0xedef -> U+7005 -> 0xfb4e CJK Unified Ideograph | |
| 249 0xedf0 -> U+7007 -> 0xfb4f CJK Unified Ideograph | |
| 250 0xedf1 -> U+7028 -> 0xfb50 CJK Unified Ideograph | |
| 251 0xedf2 -> U+7085 -> 0xfb51 CJK Unified Ideograph | |
| 252 0xedf3 -> U+70ab -> 0xfb52 CJK Unified Ideograph | |
| 253 0xedf4 -> U+710f -> 0xfb53 CJK Unified Ideograph | |
| 254 0xedf5 -> U+7104 -> 0xfb54 CJK Unified Ideograph | |
| 255 0xedf6 -> U+715c -> 0xfb55 CJK Unified Ideograph | |
| 256 0xedf7 -> U+7146 -> 0xfb56 CJK Unified Ideograph | |
| 257 0xedf8 -> U+7147 -> 0xfb57 CJK Unified Ideograph | |
| 258 0xedf9 -> U+fa15 -> 0xfb58 CJK compatibility Ideograph | |
| 259 0xedfa -> U+71c1 -> 0xfb59 CJK Unified Ideograph | |
| 260 0xedfb -> U+71fe -> 0xfb5a CJK Unified Ideograph | |
| 261 0xedfc -> U+72b1 -> 0xfb5b CJK Unified Ideograph | |
| 262 0xee40 -> U+72be -> 0xfb5c CJK Unified Ideograph | |
| 263 0xee41 -> U+7324 -> 0xfb5d CJK Unified Ideograph | |
| 264 0xee42 -> U+fa16 -> 0xfb5e CJK compatibility Ideograph | |
| 265 0xee43 -> U+7377 -> 0xfb5f CJK Unified Ideograph | |
| 266 0xee44 -> U+73bd -> 0xfb60 CJK Unified Ideograph | |
| 267 0xee45 -> U+73c9 -> 0xfb61 CJK Unified Ideograph | |
| 268 0xee46 -> U+73d6 -> 0xfb62 CJK Unified Ideograph | |
| 269 0xee47 -> U+73e3 -> 0xfb63 CJK Unified Ideograph | |
| 270 0xee48 -> U+73d2 -> 0xfb64 CJK Unified Ideograph | |
| 271 0xee49 -> U+7407 -> 0xfb65 CJK Unified Ideograph | |
| 272 0xee4a -> U+73f5 -> 0xfb66 CJK Unified Ideograph | |
| 273 0xee4b -> U+7426 -> 0xfb67 CJK Unified Ideograph | |
| 274 0xee4c -> U+742a -> 0xfb68 CJK Unified Ideograph | |
| 275 0xee4d -> U+7429 -> 0xfb69 CJK Unified Ideograph | |
| 276 0xee4e -> U+742e -> 0xfb6a CJK Unified Ideograph | |
| 277 0xee4f -> U+7462 -> 0xfb6b CJK Unified Ideograph | |
| 278 0xee50 -> U+7489 -> 0xfb6c CJK Unified Ideograph | |
| 279 0xee51 -> U+749f -> 0xfb6d CJK Unified Ideograph | |
| 280 0xee52 -> U+7501 -> 0xfb6e CJK Unified Ideograph | |
| 281 0xee53 -> U+756f -> 0xfb6f CJK Unified Ideograph | |
| 282 0xee54 -> U+7682 -> 0xfb70 CJK Unified Ideograph | |
| 283 0xee55 -> U+769c -> 0xfb71 CJK Unified Ideograph | |
| 284 0xee56 -> U+769e -> 0xfb72 CJK Unified Ideograph | |
| 285 0xee57 -> U+769b -> 0xfb73 CJK Unified Ideograph | |
| 286 0xee58 -> U+76a6 -> 0xfb74 CJK Unified Ideograph | |
| 287 0xee59 -> U+fa17 -> 0xfb75 CJK compatibility Ideograph | |
| 288 0xee5a -> U+7746 -> 0xfb76 CJK Unified Ideograph | |
| 289 0xee5b -> U+52af -> 0xfb77 CJK Unified Ideograph | |
| 290 0xee5c -> U+7821 -> 0xfb78 CJK Unified Ideograph | |
| 291 0xee5d -> U+784e -> 0xfb79 CJK Unified Ideograph | |
| 292 0xee5e -> U+7864 -> 0xfb7a CJK Unified Ideograph | |
| 293 0xee5f -> U+787a -> 0xfb7b CJK Unified Ideograph | |
| 294 0xee60 -> U+7930 -> 0xfb7c CJK Unified Ideograph | |
| 295 0xee61 -> U+fa18 -> 0xfb7d CJK compatibility Ideograph | |
| 296 0xee62 -> U+fa19 -> 0xfb7e CJK compatibility Ideograph | |
| 297 0xee63 -> U+fa1a -> 0xfb80 CJK compatibility Ideograph | |
| 298 0xee64 -> U+7994 -> 0xfb81 CJK Unified Ideograph | |
| 299 0xee65 -> U+fa1b -> 0xfb82 CJK compatibility Ideograph | |
| 300 0xee66 -> U+799b -> 0xfb83 CJK Unified Ideograph | |
| 301 0xee67 -> U+7ad1 -> 0xfb84 CJK Unified Ideograph | |
| 302 0xee68 -> U+7ae7 -> 0xfb85 CJK Unified Ideograph | |
| 303 0xee69 -> U+fa1c -> 0xfb86 CJK compatibility Ideograph | |
| 304 0xee6a -> U+7aeb -> 0xfb87 CJK Unified Ideograph | |
| 305 0xee6b -> U+7b9e -> 0xfb88 CJK Unified Ideograph | |
| 306 0xee6c -> U+fa1d -> 0xfb89 CJK compatibility Ideograph | |
| 307 0xee6d -> U+7d48 -> 0xfb8a CJK Unified Ideograph | |
| 308 0xee6e -> U+7d5c -> 0xfb8b CJK Unified Ideograph | |
| 309 0xee6f -> U+7db7 -> 0xfb8c CJK Unified Ideograph | |
| 310 0xee70 -> U+7da0 -> 0xfb8d CJK Unified Ideograph | |
| 311 0xee71 -> U+7dd6 -> 0xfb8e CJK Unified Ideograph | |
| 312 0xee72 -> U+7e52 -> 0xfb8f CJK Unified Ideograph | |
| 313 0xee73 -> U+7f47 -> 0xfb90 CJK Unified Ideograph | |
| 314 0xee74 -> U+7fa1 -> 0xfb91 CJK Unified Ideograph | |
| 315 0xee75 -> U+fa1e -> 0xfb92 CJK compatibility Ideograph | |
| 316 0xee76 -> U+8301 -> 0xfb93 CJK Unified Ideograph | |
| 317 0xee77 -> U+8362 -> 0xfb94 CJK Unified Ideograph | |
| 318 0xee78 -> U+837f -> 0xfb95 CJK Unified Ideograph | |
| 319 0xee79 -> U+83c7 -> 0xfb96 CJK Unified Ideograph | |
| 320 0xee7a -> U+83f6 -> 0xfb97 CJK Unified Ideograph | |
| 321 0xee7b -> U+8448 -> 0xfb98 CJK Unified Ideograph | |
| 322 0xee7c -> U+84b4 -> 0xfb99 CJK Unified Ideograph | |
| 323 0xee7d -> U+8553 -> 0xfb9a CJK Unified Ideograph | |
| 324 0xee7e -> U+8559 -> 0xfb9b CJK Unified Ideograph | |
| 325 0xee80 -> U+856b -> 0xfb9c CJK Unified Ideograph | |
| 326 0xee81 -> U+fa1f -> 0xfb9d CJK compatibility Ideograph | |
| 327 0xee82 -> U+85b0 -> 0xfb9e CJK Unified Ideograph | |
| 328 0xee83 -> U+fa20 -> 0xfb9f CJK compatibility Ideograph | |
| 329 0xee84 -> U+fa21 -> 0xfba0 CJK compatibility Ideograph | |
| 330 0xee85 -> U+8807 -> 0xfba1 CJK Unified Ideograph | |
| 331 0xee86 -> U+88f5 -> 0xfba2 CJK Unified Ideograph | |
| 332 0xee87 -> U+8a12 -> 0xfba3 CJK Unified Ideograph | |
| 333 0xee88 -> U+8a37 -> 0xfba4 CJK Unified Ideograph | |
| 334 0xee89 -> U+8a79 -> 0xfba5 CJK Unified Ideograph | |
| 335 0xee8a -> U+8aa7 -> 0xfba6 CJK Unified Ideograph | |
| 336 0xee8b -> U+8abe -> 0xfba7 CJK Unified Ideograph | |
| 337 0xee8c -> U+8adf -> 0xfba8 CJK Unified Ideograph | |
| 338 0xee8d -> U+fa22 -> 0xfba9 CJK compatibility Ideograph | |
| 339 0xee8e -> U+8af6 -> 0xfbaa CJK Unified Ideograph | |
| 340 0xee8f -> U+8b53 -> 0xfbab CJK Unified Ideograph | |
| 341 0xee90 -> U+8b7f -> 0xfbac CJK Unified Ideograph | |
| 342 0xee91 -> U+8cf0 -> 0xfbad CJK Unified Ideograph | |
| 343 0xee92 -> U+8cf4 -> 0xfbae CJK Unified Ideograph | |
| 344 0xee93 -> U+8d12 -> 0xfbaf CJK Unified Ideograph | |
| 345 0xee94 -> U+8d76 -> 0xfbb0 CJK Unified Ideograph | |
| 346 0xee95 -> U+fa23 -> 0xfbb1 CJK compatibility Ideograph | |
| 347 0xee96 -> U+8ecf -> 0xfbb2 CJK Unified Ideograph | |
| 348 0xee97 -> U+fa24 -> 0xfbb3 CJK compatibility Ideograph | |
| 349 0xee98 -> U+fa25 -> 0xfbb4 CJK compatibility Ideograph | |
| 350 0xee99 -> U+9067 -> 0xfbb5 CJK Unified Ideograph | |
| 351 0xee9a -> U+90de -> 0xfbb6 CJK Unified Ideograph | |
| 352 0xee9b -> U+fa26 -> 0xfbb7 CJK compatibility Ideograph | |
| 353 0xee9c -> U+9115 -> 0xfbb8 CJK Unified Ideograph | |
| 354 0xee9d -> U+9127 -> 0xfbb9 CJK Unified Ideograph | |
| 355 0xee9e -> U+91da -> 0xfbba CJK Unified Ideograph | |
| 356 0xee9f -> U+91d7 -> 0xfbbb CJK Unified Ideograph | |
| 357 0xeea0 -> U+91de -> 0xfbbc CJK Unified Ideograph | |
| 358 0xeea1 -> U+91ed -> 0xfbbd CJK Unified Ideograph | |
| 359 0xeea2 -> U+91ee -> 0xfbbe CJK Unified Ideograph | |
| 360 0xeea3 -> U+91e4 -> 0xfbbf CJK Unified Ideograph | |
| 361 0xeea4 -> U+91e5 -> 0xfbc0 CJK Unified Ideograph | |
| 362 0xeea5 -> U+9206 -> 0xfbc1 CJK Unified Ideograph | |
| 363 0xeea6 -> U+9210 -> 0xfbc2 CJK Unified Ideograph | |
| 364 0xeea7 -> U+920a -> 0xfbc3 CJK Unified Ideograph | |
| 365 0xeea8 -> U+923a -> 0xfbc4 CJK Unified Ideograph | |
| 366 0xeea9 -> U+9240 -> 0xfbc5 CJK Unified Ideograph | |
| 367 0xeeaa -> U+923c -> 0xfbc6 CJK Unified Ideograph | |
| 368 0xeeab -> U+924e -> 0xfbc7 CJK Unified Ideograph | |
| 369 0xeeac -> U+9259 -> 0xfbc8 CJK Unified Ideograph | |
| 370 0xeead -> U+9251 -> 0xfbc9 CJK Unified Ideograph | |
| 371 0xeeae -> U+9239 -> 0xfbca CJK Unified Ideograph | |
| 372 0xeeaf -> U+9267 -> 0xfbcb CJK Unified Ideograph | |
| 373 0xeeb0 -> U+92a7 -> 0xfbcc CJK Unified Ideograph | |
| 374 0xeeb1 -> U+9277 -> 0xfbcd CJK Unified Ideograph | |
| 375 0xeeb2 -> U+9278 -> 0xfbce CJK Unified Ideograph | |
| 376 0xeeb3 -> U+92e7 -> 0xfbcf CJK Unified Ideograph | |
| 377 0xeeb4 -> U+92d7 -> 0xfbd0 CJK Unified Ideograph | |
| 378 0xeeb5 -> U+92d9 -> 0xfbd1 CJK Unified Ideograph | |
| 379 0xeeb6 -> U+92d0 -> 0xfbd2 CJK Unified Ideograph | |
| 380 0xeeb7 -> U+fa27 -> 0xfbd3 CJK compatibility Ideograph | |
| 381 0xeeb8 -> U+92d5 -> 0xfbd4 CJK Unified Ideograph | |
| 382 0xeeb9 -> U+92e0 -> 0xfbd5 CJK Unified Ideograph | |
| 383 0xeeba -> U+92d3 -> 0xfbd6 CJK Unified Ideograph | |
| 384 0xeebb -> U+9325 -> 0xfbd7 CJK Unified Ideograph | |
| 385 0xeebc -> U+9321 -> 0xfbd8 CJK Unified Ideograph | |
| 386 0xeebd -> U+92fb -> 0xfbd9 CJK Unified Ideograph | |
| 387 0xeebe -> U+fa28 -> 0xfbda CJK compatibility Ideograph | |
| 388 0xeebf -> U+931e -> 0xfbdb CJK Unified Ideograph | |
| 389 0xeec0 -> U+92ff -> 0xfbdc CJK Unified Ideograph | |
| 390 0xeec1 -> U+931d -> 0xfbdd CJK Unified Ideograph | |
| 391 0xeec2 -> U+9302 -> 0xfbde CJK Unified Ideograph | |
| 392 0xeec3 -> U+9370 -> 0xfbdf CJK Unified Ideograph | |
| 393 0xeec4 -> U+9357 -> 0xfbe0 CJK Unified Ideograph | |
| 394 0xeec5 -> U+93a4 -> 0xfbe1 CJK Unified Ideograph | |
| 395 0xeec6 -> U+93c6 -> 0xfbe2 CJK Unified Ideograph | |
| 396 0xeec7 -> U+93de -> 0xfbe3 CJK Unified Ideograph | |
| 397 0xeec8 -> U+93f8 -> 0xfbe4 CJK Unified Ideograph | |
| 398 0xeec9 -> U+9431 -> 0xfbe5 CJK Unified Ideograph | |
| 399 0xeeca -> U+9445 -> 0xfbe6 CJK Unified Ideograph | |
| 400 0xeecb -> U+9448 -> 0xfbe7 CJK Unified Ideograph | |
| 401 0xeecc -> U+9592 -> 0xfbe8 CJK Unified Ideograph | |
| 402 0xeecd -> U+f9dc -> 0xfbe9 CJK compatibility Ideograph | |
| 403 0xeece -> U+fa29 -> 0xfbea CJK compatibility Ideograph | |
| 404 0xeecf -> U+969d -> 0xfbeb CJK Unified Ideograph | |
| 405 0xeed0 -> U+96af -> 0xfbec CJK Unified Ideograph | |
| 406 0xeed1 -> U+9733 -> 0xfbed CJK Unified Ideograph | |
| 407 0xeed2 -> U+973b -> 0xfbee CJK Unified Ideograph | |
| 408 0xeed3 -> U+9743 -> 0xfbef CJK Unified Ideograph | |
| 409 0xeed4 -> U+974d -> 0xfbf0 CJK Unified Ideograph | |
| 410 0xeed5 -> U+974f -> 0xfbf1 CJK Unified Ideograph | |
| 411 0xeed6 -> U+9751 -> 0xfbf2 CJK Unified Ideograph | |
| 412 0xeed7 -> U+9755 -> 0xfbf3 CJK Unified Ideograph | |
| 413 0xeed8 -> U+9857 -> 0xfbf4 CJK Unified Ideograph | |
| 414 0xeed9 -> U+9865 -> 0xfbf5 CJK Unified Ideograph | |
| 415 0xeeda -> U+fa2a -> 0xfbf6 CJK compatibility Ideograph | |
| 416 0xeedb -> U+fa2b -> 0xfbf7 CJK compatibility Ideograph | |
| 417 0xeedc -> U+9927 -> 0xfbf8 CJK Unified Ideograph | |
| 418 0xeedd -> U+fa2c -> 0xfbf9 CJK compatibility Ideograph | |
| 419 0xeede -> U+999e -> 0xfbfa CJK Unified Ideograph | |
| 420 0xeedf -> U+9a4e -> 0xfbfb CJK Unified Ideograph | |
| 421 0xeee0 -> U+9ad9 -> 0xfbfc CJK Unified Ideograph | |
| 422 0xeee1 -> U+9adc -> 0xfc40 CJK Unified Ideograph | |
| 423 0xeee2 -> U+9b75 -> 0xfc41 CJK Unified Ideograph | |
| 424 0xeee3 -> U+9b72 -> 0xfc42 CJK Unified Ideograph | |
| 425 0xeee4 -> U+9b8f -> 0xfc43 CJK Unified Ideograph | |
| 426 0xeee5 -> U+9bb1 -> 0xfc44 CJK Unified Ideograph | |
| 427 0xeee6 -> U+9bbb -> 0xfc45 CJK Unified Ideograph | |
| 428 0xeee7 -> U+9c00 -> 0xfc46 CJK Unified Ideograph | |
| 429 0xeee8 -> U+9d70 -> 0xfc47 CJK Unified Ideograph | |
| 430 0xeee9 -> U+9d6b -> 0xfc48 CJK Unified Ideograph | |
| 431 0xeeea -> U+fa2d -> 0xfc49 CJK compatibility Ideograph | |
| 432 0xeeeb -> U+9e19 -> 0xfc4a CJK Unified Ideograph | |
| 433 0xeeec -> U+9ed1 -> 0xfc4b CJK Unified Ideograph | |
| 434 0xeeef -> U+2170 -> 0xfa40 Small Roman Numeral One | |
| 435 0xeef0 -> U+2171 -> 0xfa41 Small Roman Numeral Two | |
| 436 0xeef1 -> U+2172 -> 0xfa42 Small Roman Numeral Three | |
| 437 0xeef2 -> U+2173 -> 0xfa43 Small Roman Numeral Four | |
| 438 0xeef3 -> U+2174 -> 0xfa44 Small Roman Numeral Five | |
| 439 0xeef4 -> U+2175 -> 0xfa45 Small Roman Numeral Six | |
| 440 0xeef5 -> U+2176 -> 0xfa46 Small Roman Numeral Seven | |
| 441 0xeef6 -> U+2177 -> 0xfa47 Small Roman Numeral Eight | |
| 442 0xeef7 -> U+2178 -> 0xfa48 Small Roman Numeral Nine | |
| 443 0xeef8 -> U+2179 -> 0xfa49 Small Roman Numeral Ten | |
| 444 0xeef9 -> U+ffe2 -> 0x81ca Fullwidth Not Sign | |
| 445 0xeefa -> U+ffe4 -> 0xfa55 Fullwidth Broken Bar | |
| 446 0xeefb -> U+ff07 -> 0xfa56 Fullwidth Apostrophe | |
| 447 0xeefc -> U+ff02 -> 0xfa57 Fullwidth Quotation Mark | |
| 448 0xfa4a -> U+2160 -> 0x8754 Roman Numeral One | |
| 449 0xfa4b -> U+2161 -> 0x8755 Roman Numeral Two | |
| 450 0xfa4c -> U+2162 -> 0x8756 Roman Numeral Three | |
| 451 0xfa4d -> U+2163 -> 0x8757 Roman Numeral Four | |
| 452 0xfa4e -> U+2164 -> 0x8758 Roman Numeral Five | |
| 453 0xfa4f -> U+2165 -> 0x8759 Roman Numeral Six | |
| 454 0xfa50 -> U+2166 -> 0x875a Roman Numeral Seven | |
| 455 0xfa51 -> U+2167 -> 0x875b Roman Numeral Eight | |
| 456 0xfa52 -> U+2168 -> 0x875c Roman Numeral Nine | |
| 457 0xfa53 -> U+2169 -> 0x875d Roman Numeral Ten | |
| 458 0xfa54 -> U+ffe2 -> 0x81ca Fullwidth Not Sign | |
| 459 0xfa58 -> U+3231 -> 0x878a Parenthesized Ideograph Stock | |
| 460 0xfa59 -> U+2116 -> 0x8782 Numero Sign | |
| 461 0xfa5a -> U+2121 -> 0x8784 Telephone Sign | |
| 462 0xfa5b -> U+2235 -> 0x81e6 Because")) | |
| 463 (with-input-from-string (s kb170559) | |
| 464 (loop for line = (read-line s nil) until (null line) | |
| 465 do (let ((ucs (parse-integer (subseq line 14 18) :radix 16)) | |
| 466 (cp932 (parse-integer (subseq line 26 30) :radix 16))) | |
| 467 (setf (gethash ucs *ucs-to-cp932-hash*) cp932))))) | |
| 468 | |
| 469 (defun eucjp-to-ucs (code) | |
| 470 (values (gethash code *eucjp-to-ucs-hash*))) | |
| 471 | |
| 472 (defun ucs-to-eucjp (code) | |
| 473 (values (gethash code *ucs-to-eucjp-hash*))) | |
| 474 | |
| 475 (defun cp932-to-ucs (code) | |
| 476 (values (gethash code *cp932-to-ucs-hash*))) | |
| 477 | |
| 478 (defun ucs-to-cp932 (code) | |
| 479 (values (gethash code *ucs-to-cp932-hash*))) | |
| 480 | |
| 481 ;;;; EUC-JP | |
| 482 | |
| 483 (define-character-encoding :eucjp | |
| 484 "An 8-bit, variable-length character encoding in which | |
| 485 character code points in the range #x00-#x7f can be encoded in a | |
| 486 single octet; characters with larger code values can be encoded | |
| 487 in 2 to 3 bytes." | |
| 488 :max-units-per-char 3 | |
| 489 :literal-char-code-limit #x80) | |
| 490 | |
| 491 | |
| 492 (define-octet-counter :eucjp (getter type) | |
| 493 `(named-lambda eucjp-octet-counter (seq start end max) | |
| 494 (declare (type ,type seq) (fixnum start end max)) | |
| 495 (loop with noctets fixnum = 0 | |
| 496 for i fixnum from start below end | |
| 497 for code of-type code-point = (,getter seq i) | |
| 498 do (let* ((c (ucs-to-eucjp code)) | |
| 499 (new (+ (cond ((< #xffff c) 3) | |
| 500 ((< #xff c) 2) | |
| 501 (t 1)) | |
| 502 noctets))) | |
| 503 (if (and (plusp max) (> new max)) | |
| 504 (loop-finish) | |
| 505 (setq noctets new))) | |
| 506 finally (return (values noctets i))))) | |
| 507 | |
| 508 (define-code-point-counter :eucjp (getter type) | |
| 509 `(named-lambda eucjp-code-point-counter (seq start end max) | |
| 510 (declare (type ,type seq) (fixnum start end max)) | |
| 511 (loop with nchars fixnum = 0 | |
| 512 with i fixnum = start | |
| 513 while (< i end) do | |
| 514 (let* ((octet (,getter seq i)) | |
| 515 (next-i (+ i (cond ((= #x8f octet) 3) | |
| 516 ((or (< #xa0 octet #xff) | |
| 517 (= #x8e octet)) 2) | |
| 518 (t 1))))) | |
| 519 (declare (type ub8 octet) (fixnum next-i)) | |
| 520 (cond ((> next-i end) | |
| 521 ;; Should we add restarts to this error, we'll have | |
| 522 ;; to figure out a way to communicate with the | |
| 523 ;; decoder since we probably want to do something | |
| 524 ;; about it right here when we have a chance to | |
| 525 ;; change the count or something. (Like an | |
| 526 ;; alternative replacement character or perhaps the | |
| 527 ;; existence of this error so that the decoder | |
| 528 ;; doesn't have to check for it on every iteration | |
| 529 ;; like we do.) | |
| 530 ;; | |
| 531 ;; FIXME: The data for this error is not right. | |
| 532 (decoding-error (vector octet) :eucjp seq i | |
| 533 nil 'end-of-input-in-character) | |
| 534 (return (values (1+ nchars) end))) | |
| 535 (t | |
| 536 (setq nchars (1+ nchars) | |
| 537 i next-i) | |
| 538 (when (and (plusp max) (= nchars max)) | |
| 539 (return (values nchars i)))))) | |
| 540 finally (progn (assert (= i end)) | |
| 541 (return (values nchars i)))))) | |
| 542 | |
| 543 (define-encoder :eucjp (getter src-type setter dest-type) | |
| 544 `(named-lambda eucjp-encoder (src start end dest d-start) | |
| 545 (declare (type ,src-type src) | |
| 546 (type ,dest-type dest) | |
| 547 (fixnum start end d-start)) | |
| 548 (loop with di fixnum = d-start | |
| 549 for i fixnum from start below end | |
| 550 for code of-type code-point = (,getter src i) | |
| 551 for eucjp of-type code-point | |
| 552 = (ucs-to-eucjp code) do | |
| 553 (macrolet ((set-octet (offset value) | |
| 554 `(,',setter ,value dest (the fixnum (+ di ,o… | |
| 555 (cond | |
| 556 ;; 1 octet | |
| 557 ((< eucjp #x100) | |
| 558 (set-octet 0 eucjp) | |
| 559 (incf di)) | |
| 560 ;; 2 octets | |
| 561 ((< eucjp #x10000) | |
| 562 (set-octet 0 (f-logand #xff (f-ash eucjp -8))) | |
| 563 (set-octet 1 (logand eucjp #xff)) | |
| 564 (incf di 2)) | |
| 565 ;; 3 octets | |
| 566 (t | |
| 567 (set-octet 0 (f-logand #xff (f-ash eucjp -16))) | |
| 568 (set-octet 1 (f-logand #xff (f-ash eucjp -8))) | |
| 569 (set-octet 2 (logand eucjp #xff)) | |
| 570 (incf di 3)) | |
| 571 )) | |
| 572 finally (return (the fixnum (- di d-start)))))) | |
| 573 | |
| 574 | |
| 575 (define-decoder :eucjp (getter src-type setter dest-type) | |
| 576 `(named-lambda eucjp-decoder (src start end dest d-start) | |
| 577 (declare (type ,src-type src) | |
| 578 (type ,dest-type dest) | |
| 579 (fixnum start end d-start)) | |
| 580 (let ((u2 0)) | |
| 581 (declare (type ub8 u2)) | |
| 582 (loop for di fixnum from d-start | |
| 583 for i fixnum from start below end | |
| 584 for u1 of-type ub8 = (,getter src i) do | |
| 585 ;; Note: CONSUME-OCTET doesn't check if I is being | |
| 586 ;; incremented past END. We're assuming that END has | |
| 587 ;; been calculated with the CODE-POINT-POINTER above that | |
| 588 ;; checks this. | |
| 589 (macrolet | |
| 590 ((consume-octet () | |
| 591 `(let ((next-i (incf i))) | |
| 592 (if (= next-i end) | |
| 593 ;; FIXME: data for this error is incomplete. | |
| 594 ;; and signalling this error twice | |
| 595 (return-from setter-block | |
| 596 (decoding-error nil :eucjp src i +repl+ | |
| 597 'end-of-input-in-characte… | |
| 598 (,',getter src next-i)))) | |
| 599 (handle-error (n &optional (c 'character-decoding-er… | |
| 600 `(decoding-error | |
| 601 (vector ,@(subseq '(u1 u2) 0 n)) | |
| 602 :eucjp src (1+ (- i ,n)) +repl+ ',c)) | |
| 603 (handle-error-if-icb (var n) | |
| 604 `(when (not (< #x7f ,var #xc0)) | |
| 605 (decf i) | |
| 606 (return-from setter-block | |
| 607 (handle-error ,n invalid-utf8-continuation-by… | |
| 608 (,setter | |
| 609 (block setter-block | |
| 610 (cond | |
| 611 ;; 3 octets | |
| 612 ((= u1 #x8f) | |
| 613 (setq u2 (consume-octet)) | |
| 614 (eucjp-to-ucs (logior #x8f0000 | |
| 615 (f-ash u2 8) | |
| 616 (consume-octet)))) | |
| 617 ;; 2 octets | |
| 618 ((or (= u1 #x8e) | |
| 619 (< #xa0 u1 #xff)) | |
| 620 (eucjp-to-ucs (logior (f-ash u1 8) | |
| 621 (consume-octet)))) | |
| 622 ;; 1 octet | |
| 623 (t | |
| 624 (eucjp-to-ucs u1)))) | |
| 625 dest di)) | |
| 626 finally (return (the fixnum (- di d-start))))))) | |
| 627 | |
| 628 ;;;; CP932 | |
| 629 | |
| 630 (define-character-encoding :cp932 | |
| 631 "An 8-bit, variable-length character encoding in which | |
| 632 character code points in the range #x00-#x7f can be encoded in a | |
| 633 single octet; characters with larger code values can be encoded | |
| 634 in 2 bytes." | |
| 635 :max-units-per-char 2 | |
| 636 :literal-char-code-limit #x80) | |
| 637 | |
| 638 | |
| 639 (define-octet-counter :cp932 (getter type) | |
| 640 `(named-lambda cp932-octet-counter (seq start end max) | |
| 641 (declare (type ,type seq) (fixnum start end max)) | |
| 642 (loop with noctets fixnum = 0 | |
| 643 for i fixnum from start below end | |
| 644 for code of-type code-point = (,getter seq i) | |
| 645 do (let* ((c (ucs-to-cp932 code)) | |
| 646 (new (+ (cond ((< #xff c) 2) | |
| 647 (t 1)) | |
| 648 noctets))) | |
| 649 (if (and (plusp max) (> new max)) | |
| 650 (loop-finish) | |
| 651 (setq noctets new))) | |
| 652 finally (return (values noctets i))))) | |
| 653 | |
| 654 (define-code-point-counter :cp932 (getter type) | |
| 655 `(named-lambda cp932-code-point-counter (seq start end max) | |
| 656 (declare (type ,type seq) (fixnum start end max)) | |
| 657 (loop with nchars fixnum = 0 | |
| 658 with i fixnum = start | |
| 659 while (< i end) do | |
| 660 (let* ((octet (,getter seq i)) | |
| 661 (next-i (+ i (cond ((or (<= #x81 octet #x9f) | |
| 662 (<= #xe0 octet #xfc)) | |
| 663 2) | |
| 664 (t 1))))) | |
| 665 (declare (type ub8 octet) (fixnum next-i)) | |
| 666 (cond ((> next-i end) | |
| 667 ;; Should we add restarts to this error, we'll have | |
| 668 ;; to figure out a way to communicate with the | |
| 669 ;; decoder since we probably want to do something | |
| 670 ;; about it right here when we have a chance to | |
| 671 ;; change the count or something. (Like an | |
| 672 ;; alternative replacement character or perhaps the | |
| 673 ;; existence of this error so that the decoder | |
| 674 ;; doesn't have to check for it on every iteration | |
| 675 ;; like we do.) | |
| 676 ;; | |
| 677 ;; FIXME: The data for this error is not right. | |
| 678 (decoding-error (vector octet) :cp932 seq i | |
| 679 nil 'end-of-input-in-character) | |
| 680 (return (values (1+ nchars) end))) | |
| 681 (t | |
| 682 (setq nchars (1+ nchars) | |
| 683 i next-i) | |
| 684 (when (and (plusp max) (= nchars max)) | |
| 685 (return (values nchars i)))))) | |
| 686 finally (progn (assert (= i end)) | |
| 687 (return (values nchars i)))))) | |
| 688 | |
| 689 (define-encoder :cp932 (getter src-type setter dest-type) | |
| 690 `(named-lambda cp932-encoder (src start end dest d-start) | |
| 691 (declare (type ,src-type src) | |
| 692 (type ,dest-type dest) | |
| 693 (fixnum start end d-start)) | |
| 694 (loop with di fixnum = d-start | |
| 695 for i fixnum from start below end | |
| 696 for code of-type code-point = (,getter src i) | |
| 697 for cp932 of-type code-point | |
| 698 = (ucs-to-cp932 code) do | |
| 699 (macrolet ((set-octet (offset value) | |
| 700 `(,',setter ,value dest (the fixnum (+ di ,o… | |
| 701 (cond | |
| 702 ;; 1 octet | |
| 703 ((< cp932 #x100) | |
| 704 (set-octet 0 cp932) | |
| 705 (incf di)) | |
| 706 ;; 2 octets | |
| 707 ((< cp932 #x10000) | |
| 708 (set-octet 0 (f-logand #xff (f-ash cp932 -8))) | |
| 709 (set-octet 1 (logand cp932 #xff)) | |
| 710 (incf di 2)) | |
| 711 ;; 3 octets | |
| 712 (t | |
| 713 (set-octet 0 (f-logand #xff (f-ash cp932 -16))) | |
| 714 (set-octet 1 (f-logand #xff (f-ash cp932 -8))) | |
| 715 (set-octet 2 (logand cp932 #xff)) | |
| 716 (incf di 3)) | |
| 717 )) | |
| 718 finally (return (the fixnum (- di d-start)))))) | |
| 719 | |
| 720 | |
| 721 (define-decoder :cp932 (getter src-type setter dest-type) | |
| 722 `(named-lambda cp932-decoder (src start end dest d-start) | |
| 723 (declare (type ,src-type src) | |
| 724 (type ,dest-type dest) | |
| 725 (fixnum start end d-start)) | |
| 726 (let ((u2 0)) | |
| 727 (declare (type ub8 u2)) | |
| 728 (loop for di fixnum from d-start | |
| 729 for i fixnum from start below end | |
| 730 for u1 of-type ub8 = (,getter src i) do | |
| 731 ;; Note: CONSUME-OCTET doesn't check if I is being | |
| 732 ;; incremented past END. We're assuming that END has | |
| 733 ;; been calculated with the CODE-POINT-POINTER above that | |
| 734 ;; checks this. | |
| 735 (macrolet | |
| 736 ((consume-octet () | |
| 737 `(let ((next-i (incf i))) | |
| 738 (if (= next-i end) | |
| 739 ;; FIXME: data for this error is incomplete. | |
| 740 ;; and signalling this error twice | |
| 741 (return-from setter-block | |
| 742 (decoding-error nil :cp932 src i +repl+ | |
| 743 'end-of-input-in-characte… | |
| 744 (,',getter src next-i)))) | |
| 745 (handle-error (n &optional (c 'character-decoding-er… | |
| 746 `(decoding-error | |
| 747 (vector ,@(subseq '(u1 u2) 0 n)) | |
| 748 :cp932 src (1+ (- i ,n)) +repl+ ',c)) | |
| 749 (handle-error-if-icb (var n) | |
| 750 `(when (not (< #x7f ,var #xc0)) | |
| 751 (decf i) | |
| 752 (return-from setter-block | |
| 753 (handle-error ,n invalid-utf8-continuation-by… | |
| 754 (,setter | |
| 755 (block setter-block | |
| 756 (cond | |
| 757 ;; 2 octets | |
| 758 ((or (<= #x81 u1 #x9f) | |
| 759 (<= #xe0 u1 #xfc)) | |
| 760 (setq u2 (consume-octet)) | |
| 761 (cp932-to-ucs (logior (f-ash u1 8) | |
| 762 u2))) | |
| 763 ;; 1 octet | |
| 764 (t | |
| 765 (cp932-to-ucs u1)))) | |
| 766 dest di)) | |
| 767 finally (return (the fixnum (- di d-start))))))) |