encodings.lisp - clic - Clic is an command line interactive client for gopher w… | |
git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
LICENSE | |
--- | |
encodings.lisp (22651B) | |
--- | |
1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*- | |
2 ;;; | |
3 ;;; encodings.lisp --- Character encodings and mappings. | |
4 ;;; | |
5 ;;; Copyright (C) 2007, Luis Oliveira <[email protected]> | |
6 ;;; | |
7 ;;; Permission is hereby granted, free of charge, to any person | |
8 ;;; obtaining a copy of this software and associated documentation | |
9 ;;; files (the "Software"), to deal in the Software without | |
10 ;;; restriction, including without limitation the rights to use, copy, | |
11 ;;; modify, merge, publish, distribute, sublicense, and/or sell copies | |
12 ;;; of the Software, and to permit persons to whom the Software is | |
13 ;;; furnished to do so, subject to the following conditions: | |
14 ;;; | |
15 ;;; The above copyright notice and this permission notice shall be | |
16 ;;; included in all copies or substantial portions of the Software. | |
17 ;;; | |
18 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
19 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
20 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
21 ;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
22 ;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
23 ;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
24 ;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
25 ;;; DEALINGS IN THE SOFTWARE. | |
26 | |
27 (in-package #:babel-encodings) | |
28 | |
29 ;;;; Character Encodings | |
30 | |
31 (defclass character-encoding () | |
32 ((name :initarg :name :reader enc-name | |
33 :initform (error "Must specify a NAME for this character encodi… | |
34 ;; Most of these documentation strings are taken from OpenMCL. | |
35 (documentation | |
36 :initarg :documentation :reader enc-documentation :initform nil) | |
37 ;; A non-exhaustive list of aliases for the encoding. | |
38 (aliases :initarg :aliases :initform nil :reader enc-aliases) | |
39 ;; Specified in bits. Usually 8, 16 or 32. | |
40 (code-unit-size | |
41 :initarg :code-unit-size :reader enc-code-unit-size :initform 8) | |
42 (max-units-per-char | |
43 :initarg :max-units-per-char :reader enc-max-units-per-char :initfor… | |
44 ;; If NIL, it is necessary to swap 16- and 32-bit units. | |
45 (native-endianness | |
46 :initarg :native-endianness :reader enc-native-endianness :initform … | |
47 ;; Code units less than this value map to themselves on input. | |
48 (decode-literal-code-unit-limit | |
49 :initarg :decode-literal-code-unit-limit :initform 0 | |
50 :reader enc-decode-literal-code-unit-limit) | |
51 ;; Code points less than this value map to themselves on output. | |
52 (encode-literal-code-unit-limit | |
53 :initarg :encode-literal-code-unit-limit :initform 0 | |
54 :reader enc-encode-literal-code-unit-limit) | |
55 ;; Defines whether it is necessary to prepend a byte-order-mark to | |
56 ;; determine the endianness. | |
57 (use-bom :initarg :use-bom :initform nil :reader enc-use-bom) | |
58 ;; How the byte-order-mark should be encoded, specified as a | |
59 ;; sequence of octets. NIL if it cannot be encoded. | |
60 (bom-encoding | |
61 :initarg :bom-encoding :reader enc-bom-encoding :initform nil) | |
62 ;; How should NUL be encoded, specified as sequence of octets. | |
63 (nul-encoding | |
64 :initarg :nul-encoding :reader enc-nul-encoding :initform #(0)) | |
65 ;; Preferred replacement character code point. | |
66 (default-replacement | |
67 :initarg :default-replacement :reader enc-default-replacement | |
68 :initform #x1a) | |
69 ;; Does VALID-STRING => OCTETS => STRING2 guarantee a valid | |
70 ;; STRING2? UTF-{16,32} on little-endian plaforms don't because | |
71 ;; they assume different endianness on each direction. | |
72 (ambiguous | |
73 :initarg :ambiguous :reader ambiguous-encoding-p :initform nil))) | |
74 | |
75 ;;; I'm too lazy to write all the identical limits twice. | |
76 (defmethod initialize-instance :after ((enc character-encoding) | |
77 &key literal-char-code-limit) | |
78 (when literal-char-code-limit | |
79 (setf (slot-value enc 'encode-literal-code-unit-limit) | |
80 literal-char-code-limit) | |
81 (setf (slot-value enc 'decode-literal-code-unit-limit) | |
82 literal-char-code-limit))) | |
83 | |
84 #-(and) | |
85 (defmethod describe-object ((enc character-encoding) s) | |
86 "Prints out the name, aliases and documentation slots of a | |
87 character encoding object." | |
88 (with-slots (name aliases documentation) enc | |
89 (format s "~&~S" name) | |
90 (when aliases | |
91 (format s " [Aliases:~{ ~S~}]" aliases)) | |
92 (format s "~&~A~%~%" documentation)) | |
93 (call-next-method)) | |
94 | |
95 (defvar *supported-character-encodings* nil) | |
96 | |
97 (defun list-character-encodings () | |
98 "List of keyword symbols denoting supported character | |
99 encodings. This list does not include aliases." | |
100 *supported-character-encodings*) | |
101 | |
102 (defvar *character-encodings* (make-hash-table :test 'eq)) | |
103 | |
104 (defvar *default-character-encoding* :utf-8 | |
105 "Special variable used to determine the default character | |
106 encoding.") | |
107 | |
108 (defun get-character-encoding (name) | |
109 "Lookups the character encoding denoted by the keyword symbol | |
110 NAME. Signals an error if one is not found. If NAME is already | |
111 a CHARACTER-ENCONDING object, it is returned unmodified." | |
112 (when (typep name 'character-encoding) | |
113 (return-from get-character-encoding name)) | |
114 (when (eq name :default) | |
115 (setq name *default-character-encoding*)) | |
116 (or (gethash name *character-encodings*) | |
117 (error "Unknown character encoding: ~S" name))) | |
118 | |
119 (defmethod ambiguous-encoding-p ((encoding symbol)) | |
120 (ambiguous-encoding-p (get-character-encoding encoding))) | |
121 | |
122 (defun notice-character-encoding (enc) | |
123 (pushnew (enc-name enc) *supported-character-encodings*) | |
124 (dolist (kw (cons (enc-name enc) (enc-aliases enc))) | |
125 (setf (gethash kw *character-encodings*) enc)) | |
126 (enc-name enc)) | |
127 | |
128 (defmacro define-character-encoding (name docstring &body options) | |
129 `(notice-character-encoding | |
130 (make-instance 'character-encoding :name ,name ,@options | |
131 :documentation ,docstring))) | |
132 | |
133 ;;;; Mappings | |
134 | |
135 ;;; TODO: describe what mappings are | |
136 | |
137 (defun make-fixed-width-counter (getter type &optional (unit-size-in-bit… | |
138 (declare (ignore getter type)) | |
139 (check-type unit-size-in-bits positive-fixnum) | |
140 (let ((unit-size-in-bytes (/ unit-size-in-bits 8))) | |
141 `(named-lambda fixed-width-counter (seq start end max) | |
142 (declare (ignore seq) (fixnum start end max)) | |
143 ;; XXX: the result can be bigger than a fixnum when (> unit-size | |
144 ;; 1) and we don't want that to happen. Possible solution: signal | |
145 ;; a warning (hmm, make that an actual error) and truncate. | |
146 (if (plusp max) | |
147 (let ((count (the fixnum (min (floor max ,unit-size-in-bytes) | |
148 (the fixnum (- end start)))))) | |
149 (values (the fixnum (* count ,unit-size-in-bytes)) | |
150 (the fixnum (+ start count)))) | |
151 (values (the fixnum (* (the fixnum (- end start)) | |
152 ,unit-size-in-bytes)) | |
153 (the fixnum end)))))) | |
154 | |
155 ;;; Useful to develop new encodings incrementally starting with octet | |
156 ;;; and code-unit counters. | |
157 (defun make-dummy-coder (sg st ds dt) | |
158 (declare (ignore sg st ds dt)) | |
159 `(named-lambda dummy-coder (src s e dest i) | |
160 (declare (ignore src s e dest i)) | |
161 (error "this encoder/decoder hasn't been implemented yet"))) | |
162 | |
163 ;;; TODO: document here | |
164 ;;; | |
165 ;;; ENCODER -- (lambda (src-getter src-type dest-setter dest-type) ...) | |
166 ;;; DECODER -- (lambda (src-getter src-type dest-setter dest-type) ...) | |
167 ;;; | |
168 ;;; OCTET-COUNTER -- (lambda (getter type) ...) | |
169 ;;; CODE-POINT-COUNTER -- (lambda (getter type) ...) | |
170 (defclass abstract-mapping () | |
171 ((encoder-factory :accessor encoder-factory :initform 'make-dummy-code… | |
172 (decoder-factory :accessor decoder-factory :initform 'make-dummy-code… | |
173 (octet-counter-factory :accessor octet-counter-factory | |
174 :initform 'make-fixed-width-counter) | |
175 (code-point-counter-factory :accessor code-point-counter-factory | |
176 :initform 'make-fixed-width-counter))) | |
177 | |
178 ;;; TODO: document these | |
179 ;;; | |
180 ;;; ENCODER -- (lambda (src start end dest d-start) ...) | |
181 ;;; DECODER -- (lambda (src start end dest d-start) ...) | |
182 ;;; | |
183 ;;; OCTET-COUNTER -- (lambda (seq start end max-octets) ...) | |
184 ;;; CODE-POINT-COUNTER -- (lambda (seq start end max-chars) ...) | |
185 ;;; => N-CHARS NEW-END | |
186 ;;; (important: describe NEW-END) | |
187 (defclass concrete-mapping () | |
188 ((encoder :accessor encoder) | |
189 (decoder :accessor decoder) | |
190 (octet-counter :accessor octet-counter) | |
191 (code-point-counter :accessor code-point-counter))) | |
192 | |
193 (defparameter *abstract-mappings* (make-hash-table :test 'eq)) | |
194 | |
195 (defun get-abstract-mapping (encoding) | |
196 (gethash encoding *abstract-mappings*)) | |
197 | |
198 (defun (setf get-abstract-mapping) (value encoding) | |
199 (setf (gethash encoding *abstract-mappings*) value)) | |
200 | |
201 (defun %register-mapping-part (encoding slot-name fn) | |
202 (let ((mapping (get-abstract-mapping encoding))) | |
203 (unless mapping | |
204 (setq mapping (make-instance 'abstract-mapping)) | |
205 (setf (get-abstract-mapping encoding) mapping)) | |
206 (setf (slot-value mapping slot-name) fn))) | |
207 | |
208 ;;; See enc-*.lisp for example usages of these 4 macros. | |
209 | |
210 (defmacro define-encoder (encoding (sa st da dt) &body body) | |
211 `(%register-mapping-part ,encoding 'encoder-factory | |
212 (named-lambda encoder (,sa ,st ,da ,dt) | |
213 ,@body))) | |
214 | |
215 (defmacro define-decoder (encoding (sa st da dt) &body body) | |
216 `(%register-mapping-part ,encoding 'decoder-factory | |
217 (named-lambda decoder (,sa ,st ,da ,dt) | |
218 ,@body))) | |
219 | |
220 (defmacro define-octet-counter (encoding (acc type) &body body) | |
221 `(%register-mapping-part ,encoding 'octet-counter-factory | |
222 (named-lambda octet-counter-factory (,acc ,ty… | |
223 ,@body))) | |
224 | |
225 (defmacro define-code-point-counter (encoding (acc type) &body body) | |
226 `(%register-mapping-part ,encoding 'code-point-counter-factory | |
227 (named-lambda code-point-counter (,acc ,type) | |
228 ,@body))) | |
229 | |
230 (defun instantiate-encoder (encoding am octet-seq-getter octet-seq-type | |
231 code-point-seq-setter code-point-seq-type) | |
232 (declare (ignore encoding)) | |
233 (funcall (encoder-factory am) | |
234 octet-seq-getter | |
235 octet-seq-type | |
236 code-point-seq-setter | |
237 code-point-seq-type)) | |
238 | |
239 (defun instantiate-decoder (encoding am octet-seq-getter octet-seq-type | |
240 code-point-seq-setter code-point-seq-type) | |
241 (declare (ignore encoding)) | |
242 (funcall (decoder-factory am) | |
243 octet-seq-getter | |
244 octet-seq-type | |
245 code-point-seq-setter | |
246 code-point-seq-type)) | |
247 | |
248 (defun instantiate-code-point-counter (encoding am octet-seq-getter | |
249 octet-seq-type) | |
250 (declare (ignore encoding)) | |
251 (funcall (code-point-counter-factory am) | |
252 octet-seq-getter | |
253 octet-seq-type)) | |
254 | |
255 (defun instantiate-octet-counter (encoding am code-point-seq-getter | |
256 code-point-seq-type) | |
257 (if (= 1 (enc-max-units-per-char encoding)) | |
258 (make-fixed-width-counter code-point-seq-getter code-point-seq-type | |
259 (enc-code-unit-size encoding)) | |
260 (funcall (octet-counter-factory am) | |
261 code-point-seq-getter | |
262 code-point-seq-type))) | |
263 | |
264 ;;; Expands into code generated by the available abstract mappings | |
265 ;;; that will be compiled into concrete mappings. This is used in | |
266 ;;; e.g. strings.lisp to define mappings between strings and | |
267 ;;; (unsigned-byte 8) vectors. | |
268 ;;; | |
269 ;;; For each encoding funcall the abstract mappings at macro-expansion | |
270 ;;; time with the src/dest accessors and types to generate the | |
271 ;;; appropriate code for the concrete mappings. These functions are | |
272 ;;; then saved in their respective slots of the CONCRETE-MAPPING | |
273 ;;; object. | |
274 (defmacro instantiate-concrete-mappings | |
275 (&key (encodings (hash-table-keys *abstract-mappings*)) | |
276 (optimize '((speed 3) (debug 0) (compilation-speed 0))) | |
277 octet-seq-getter octet-seq-setter octet-seq-type | |
278 code-point-seq-getter code-point-seq-setter code-point-seq-type | |
279 (instantiate-decoders t)) | |
280 `(let ((ht (make-hash-table :test 'eq))) | |
281 (declare (optimize ,@optimize) | |
282 #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) | |
283 (flet ((notice-mapping (encoding-name cm) | |
284 (let* ((encoding (get-character-encoding encoding-name)) | |
285 (aliases (enc-aliases encoding))) | |
286 (dolist (kw (cons (enc-name encoding) aliases)) | |
287 (setf (gethash kw ht) cm))))) | |
288 ,@(loop for encoding-name in encodings | |
289 for encoding = (get-character-encoding encoding-name) | |
290 for am = (gethash encoding-name *abstract-mappings*) | |
291 collect | |
292 `(let ((cm (make-instance 'concrete-mapping))) | |
293 (setf (encoder cm) | |
294 ,(instantiate-encoder encoding am | |
295 code-point-seq-getter | |
296 code-point-seq-type | |
297 octet-seq-setter | |
298 octet-seq-type)) | |
299 ,(when instantiate-decoders | |
300 `(progn | |
301 (setf (decoder cm) | |
302 ,(instantiate-decoder encoding am | |
303 octet-seq-getter | |
304 octet-seq-type | |
305 code-point-seq-setter | |
306 code-point-seq-type)) | |
307 (setf (code-point-counter cm) | |
308 ,(instantiate-code-point-counter | |
309 encoding am octet-seq-getter octet-seq-t… | |
310 (setf (octet-counter cm) | |
311 ,(instantiate-octet-counter encoding am | |
312 code-point-seq-getter | |
313 code-point-seq-type)) | |
314 (notice-mapping ,encoding-name cm)))) | |
315 ht)) | |
316 | |
317 ;;; debugging stuff | |
318 | |
319 #-(and) | |
320 (defun pprint-instantiate-concrete-mappings | |
321 (&key (encodings (hash-table-keys *abstract-mappings*)) | |
322 (optimize '((debug 3) (safety 3))) | |
323 (octet-seq-setter 'ub-set) (octet-seq-getter 'ub-get) | |
324 (octet-seq-type '(simple-array (unsigned-byte 8) (*))) | |
325 (code-point-seq-setter 'string-set) | |
326 (code-point-seq-getter 'string-get) | |
327 (code-point-seq-type 'simple-unicode-string)) | |
328 (let ((encodings (ensure-list encodings)) | |
329 (*package* (find-package :babel-encodings)) | |
330 (*print-case* :downcase)) | |
331 (pprint | |
332 (macroexpand | |
333 `(instantiate-concrete-mappings | |
334 :encodings ,encodings | |
335 :optimize ,optimize | |
336 :octet-seq-getter ,octet-seq-getter | |
337 :octet-seq-setter ,octet-seq-setter | |
338 :octet-seq-type ,octet-seq-type | |
339 :code-point-seq-getter ,code-point-seq-getter | |
340 :code-point-seq-setter ,code-point-seq-setter | |
341 :code-point-seq-type ,code-point-seq-type)))) | |
342 (values)) | |
343 | |
344 ;;;; Utilities used in enc-*.lisp | |
345 | |
346 (defconstant +default-substitution-code-point+ #x1a | |
347 "Default ASCII substitution character code point used in case of an en… | |
348 | |
349 ;;; We're converting between objects of the (UNSIGNED-BYTE 8) and | |
350 ;;; (MOD #x110000) types which are aliased here to UB8 and CODE-POINT | |
351 ;;; for convenience. | |
352 (deftype ub8 () '(unsigned-byte 8)) | |
353 (deftype code-point () '(mod #x110000)) | |
354 | |
355 ;;; Utility macro around DEFINE-ENCODER that takes care of most of the | |
356 ;;; work need to deal with an 8-bit, fixed-width character encoding. | |
357 ;;; | |
358 ;;; BODY will be inside a loop and its return value will placed in the | |
359 ;;; destination buffer. BODY will be surounded by lexical BLOCK which | |
360 ;;; will have the ENCODING's name, usually a keyword. It handles all | |
361 ;;; sorts of type declarations. | |
362 ;;; | |
363 ;;; See enc-ascii.lisp for a simple usage example. | |
364 (defmacro define-unibyte-encoder (encoding (code) &body body) | |
365 (with-unique-names (s-getter s-type d-setter d-type | |
366 src start end dest d-start i di) | |
367 `(define-encoder ,encoding (,s-getter ,s-type ,d-setter ,d-type) | |
368 `(named-lambda ,',(symbolicate encoding '#:-unibyte-encoder) | |
369 (,',src ,',start ,',end ,',dest ,',d-start) | |
370 (declare (type ,,s-type ,',src) | |
371 (type ,,d-type ,',dest) | |
372 (fixnum ,',start ,',end ,',d-start)) | |
373 (loop for ,',i fixnum from ,',start below ,',end | |
374 and ,',di fixnum from ,',d-start do | |
375 (,,d-setter | |
376 (macrolet | |
377 ;; this should probably be a function... | |
378 ((handle-error (&optional (c ''character-encoding-e… | |
379 `(encoding-error | |
380 ,',',code ,',',encoding ,',',src ,',',i | |
381 +default-substitution-code-point+ ,c))) | |
382 (let ((,',code (,,s-getter ,',src ,',i))) | |
383 (declare (type code-point ,',code)) | |
384 (block ,',encoding ,@',body))) | |
385 ,',dest ,',di) | |
386 finally (return (the fixnum (- ,',di ,',d-start)))))))) | |
387 | |
388 ;;; The decoder version of the above macro. | |
389 (defmacro define-unibyte-decoder (encoding (octet) &body body) | |
390 (with-unique-names (s-getter s-type d-setter d-type | |
391 src start end dest d-start i di) | |
392 `(define-decoder ,encoding (,s-getter ,s-type ,d-setter ,d-type) | |
393 `(named-lambda ,',(symbolicate encoding '#:-unibyte-encoder) | |
394 (,',src ,',start ,',end ,',dest ,',d-start) | |
395 (declare (type ,,s-type ,',src) | |
396 (type ,,d-type ,',dest) | |
397 (fixnum ,',start ,',end ,',d-start)) | |
398 (loop for ,',i fixnum from ,',start below ,',end | |
399 and ,',di fixnum from ,',d-start do | |
400 (,,d-setter | |
401 (macrolet | |
402 ;; this should probably be a function... | |
403 ((handle-error (&optional (c ''character-decoding-e… | |
404 `(decoding-error | |
405 (vector ,',',octet) ,',',encoding ,',',src ,',… | |
406 +default-substitution-code-point+ ,c))) | |
407 (let ((,',octet (,,s-getter ,',src ,',i))) | |
408 (declare (type ub8 ,',octet)) | |
409 (block ,',encoding ,@',body))) | |
410 ,',dest ,',di) | |
411 finally (return (the fixnum (- ,',di ,',d-start)))))))) | |
412 | |
413 ;;;; Error Conditions | |
414 ;;; | |
415 ;;; For now, we don't define any actual restarts. The only mechanism | |
416 ;;; for "restarting" a coding error is the | |
417 ;;; *SUPPRESS-CHARACTER-CODING-ERRORS* special variable which, when | |
418 ;;; bound to T (the default), suppresses any error and uses a default | |
419 ;;; replacement character instead. | |
420 ;;; | |
421 ;;; If it turns out that other more options are necessary, possible | |
422 ;;; alternative approaches include: | |
423 ;;; | |
424 ;;; a) use a *REPLACEMENT-CHARACTER* special variable that lets us | |
425 ;;; pick our own replacement character. The encoder must do | |
426 ;;; additional work to check if this is character is encodable. | |
427 ;;; | |
428 ;;; b) offer a restart to pick a replacement character. Same | |
429 ;;; problem as above. | |
430 ;;; | |
431 ;;; Both approaches pose encoding problems when dealing with a | |
432 ;;; variable-width encodings because different replacement characters | |
433 ;;; will need different numbers of octets. This is not a problem for | |
434 ;;; UTF but will be a problem for the CJK charsets. Approach (a) is | |
435 ;;; nevertheless easier since the replacement character is known in | |
436 ;;; advance and therefore the octet-counter can account for it. | |
437 ;;; | |
438 ;;; For more complex restarts like SBCL's -- that'll let you specify | |
439 ;;; _several_ replacement characters for a single character error -- | |
440 ;;; will probably need extra support code outside the encoder/decoder | |
441 ;;; (i.e. in the string-to-octets function, for example) since the | |
442 ;;; encoders/decoders deal with pre-allocated fixed-length buffers. | |
443 ;;; | |
444 ;;; SBCL has ASCII-specific (MALFORMED-ASCII) and UTF8-specific | |
445 ;;; errors. Why? Do we want to add some of those too? | |
446 | |
447 ;;; FIXME: We used to deal with this with an extra ERRORP argument for | |
448 ;;; encoders, decoders, etc... Still undecided on the best way to do | |
449 ;;; it. We could also use a simple restart instead of this... | |
450 ;;; | |
451 ;;; In any case, this is not for the users to bind and it's not | |
452 ;;; exported from the BABEL package. | |
453 (defvar *suppress-character-coding-errors* nil | |
454 "If non-NIL, encoding or decoding errors are suppressed and the | |
455 the current character encoding's default replacement character is | |
456 used.") | |
457 | |
458 ;;; All of Babel's error conditions are subtypes of | |
459 ;;; CHARACTER-CODING-ERROR. This error hierarchy is based on SBCL's. | |
460 (define-condition character-coding-error (error) | |
461 ((buffer :initarg :buffer :reader character-coding-error-buffer) | |
462 (position :initarg :position :reader character-coding-error-position) | |
463 (encoding :initarg :encoding :reader character-coding-error-encoding)… | |
464 | |
465 (define-condition character-encoding-error (character-coding-error) | |
466 ((code :initarg :code :reader character-encoding-error-code)) | |
467 (:report (lambda (c s) | |
468 (format s "Unable to encode character code point ~A as ~S." | |
469 (character-encoding-error-code c) | |
470 (character-coding-error-encoding c))))) | |
471 | |
472 (declaim (inline encoding-error)) | |
473 (defun encoding-error (code enc buf pos &optional | |
474 (sub +default-substitution-code-point+) | |
475 (e 'character-encoding-error)) | |
476 (unless *suppress-character-coding-errors* | |
477 (error e :encoding enc :buffer buf :position pos :code code)) | |
478 sub) | |
479 | |
480 (define-condition character-decoding-error (character-coding-error) | |
481 ((octets :initarg :octets :reader character-decoding-error-octets)) | |
482 (:report (lambda (c s) | |
483 (format s "Illegal ~S character starting at position ~D." | |
484 (character-coding-error-encoding c) | |
485 (character-coding-error-position c))))) | |
486 | |
487 (define-condition end-of-input-in-character (character-decoding-error) | |
488 () | |
489 (:documentation "Signalled by DECODERs or CODE-POINT-COUNTERs | |
490 of variable-width character encodings.")) | |
491 | |
492 (define-condition character-out-of-range (character-decoding-error) | |
493 () | |
494 (:documentation | |
495 "Signalled when the character being decoded is out of range.")) | |
496 | |
497 (declaim (inline decoding-error)) | |
498 (defun decoding-error (octets enc buf pos &optional | |
499 (sub +default-substitution-code-point+) | |
500 (e 'character-decoding-error)) | |
501 (unless *suppress-character-coding-errors* | |
502 (error e :octets octets :encoding enc :buffer buf :position pos)) | |
503 sub) |