tests.lisp - clic - Clic is an command line interactive client for gopher writt… | |
git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
LICENSE | |
--- | |
tests.lisp (44012B) | |
--- | |
1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*- | |
2 ;;; | |
3 ;;; tests.lisp --- Unit and regression tests for Babel. | |
4 ;;; | |
5 ;;; Copyright (C) 2007-2009, Luis Oliveira <[email protected]> | |
6 ;;; | |
7 ;;; Permission is hereby granted, free of charge, to any person | |
8 ;;; obtaining a copy of this software and associated documentation | |
9 ;;; files (the "Software"), to deal in the Software without | |
10 ;;; restriction, including without limitation the rights to use, copy, | |
11 ;;; modify, merge, publish, distribute, sublicense, and/or sell copies | |
12 ;;; of the Software, and to permit persons to whom the Software is | |
13 ;;; furnished to do so, subject to the following conditions: | |
14 ;;; | |
15 ;;; The above copyright notice and this permission notice shall be | |
16 ;;; included in all copies or substantial portions of the Software. | |
17 ;;; | |
18 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
19 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
20 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
21 ;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
22 ;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
23 ;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
24 ;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
25 ;;; DEALINGS IN THE SOFTWARE. | |
26 | |
27 (in-package #:cl-user) | |
28 (defpackage #:babel-tests | |
29 (:use #:common-lisp #:babel #:babel-encodings #:hu.dwim.stefil) | |
30 (:import-from #:alexandria #:ignore-some-conditions) | |
31 (:export #:run)) | |
32 (in-package #:babel-tests) | |
33 | |
34 (defun indented-format (level stream format-control &rest format-argumen… | |
35 (let ((line-prefix (make-string level :initial-element #\Space))) | |
36 (let ((output (format nil "~?~%" format-control format-arguments))) | |
37 (with-input-from-string (s output) | |
38 (loop for line = (read-line s nil nil) until (null line) | |
39 do (format stream "~A~A~%" line-prefix line)))))) | |
40 | |
41 ;; adapted from https://github.com/luismbo/stefil/blob/master/source/sui… | |
42 (defun describe-failed-tests (&key (result *last-test-result*) (stream t… | |
43 "Prints out a report for RESULT in STREAM. | |
44 | |
45 RESULT defaults to `*last-test-result*' and STREAM defaults to t" | |
46 (let ((descs (hu.dwim.stefil::failure-descriptions-of result))) | |
47 (cond ((zerop (length descs)) | |
48 (format stream "~&~%[no failures!]")) | |
49 (t | |
50 (format stream "~&~%Test failures:~%") | |
51 (dotimes (i (length descs)) | |
52 (let ((desc (aref descs i)) | |
53 format-control format-arguments) | |
54 ;; XXX: most of Stefil's conditions specialise DESCRIBE-O… | |
55 ;; with nice human-readable messages. We should add any m… | |
56 ;; ones (like UNEXPECTED-ERROR) and ditch this code. | |
57 (etypecase desc | |
58 (hu.dwim.stefil::unexpected-error | |
59 (let ((c (hu.dwim.stefil::condition-of desc))) | |
60 (typecase c | |
61 (simple-condition | |
62 (setf format-control (simple-condition-format-con… | |
63 (setf format-arguments | |
64 (simple-condition-format-arguments c))) | |
65 (t | |
66 (setf format-control "~S" | |
67 format-arguments (list c)))))) | |
68 (hu.dwim.stefil::failed-assertion | |
69 (setf format-control (hu.dwim.stefil::format-control-o… | |
70 format-arguments (hu.dwim.stefil::format-argumen… | |
71 (hu.dwim.stefil::missing-condition | |
72 (setf format-control "~A" | |
73 format-arguments (list (with-output-to-string (s… | |
74 (describe desc stream))… | |
75 (null | |
76 (setf format-control "Test succeeded!"))) | |
77 (format stream "~%Failure ~A: ~A when running ~S~%~%" | |
78 (1+ i) | |
79 (type-of desc) | |
80 (hu.dwim.stefil::name-of (hu.dwim.stefil::test-of… | |
81 (indented-format 4 stream "~?" format-control format-argu… | |
82 | |
83 (defun run () | |
84 (let ((test-run (without-debugging (babel-tests)))) | |
85 (print test-run) | |
86 (describe-failed-tests :result test-run) | |
87 (values (zerop (length (hu.dwim.stefil::failure-descriptions-of test… | |
88 test-run))) | |
89 | |
90 (defsuite* (babel-tests :in root-suite)) | |
91 | |
92 (defun ub8v (&rest contents) | |
93 (make-array (length contents) :element-type '(unsigned-byte 8) | |
94 :initial-contents contents)) | |
95 | |
96 (defun make-ub8-vector (size) | |
97 (make-array size :element-type '(unsigned-byte 8) | |
98 :initial-element 0)) | |
99 | |
100 (defmacro returns (form &rest values) | |
101 "Asserts, through EQUALP, that FORM returns VALUES." | |
102 `(is (equalp (multiple-value-list ,form) (list ,@values)))) | |
103 | |
104 (defmacro defstest (name form &body return-values) | |
105 "Similar to RT's DEFTEST." | |
106 `(deftest ,name () | |
107 (returns ,form ,@(mapcar (lambda (x) `',x) return-values)))) | |
108 | |
109 (defun fail (control-string &rest arguments) | |
110 (hu.dwim.stefil::record/failure 'hu.dwim.stefil::failed-assertion | |
111 :format-control control-string | |
112 :format-arguments arguments)) | |
113 | |
114 (defun expected (expected &key got) | |
115 (fail "expected ~A, got ~A instead" expected got)) | |
116 | |
117 (enable-sharp-backslash-syntax) | |
118 | |
119 ;;;; Simple tests using ASCII | |
120 | |
121 (defstest enc.ascii.1 | |
122 (string-to-octets "abc" :encoding :ascii) | |
123 #(97 98 99)) | |
124 | |
125 (defstest enc.ascii.2 | |
126 (string-to-octets (string #\uED) :encoding :ascii :errorp nil) | |
127 #(#x1a)) | |
128 | |
129 (deftest enc.ascii.3 () | |
130 (handler-case | |
131 (string-to-octets (string #\uED) :encoding :ascii :errorp t) | |
132 (character-encoding-error (c) | |
133 (is (eql 0 (character-coding-error-position c))) | |
134 (is (eq :ascii (character-coding-error-encoding c))) | |
135 (is (eql #xed (character-encoding-error-code c)))) | |
136 (:no-error (result) | |
137 (expected 'character-encoding-error :got result)))) | |
138 | |
139 (defstest dec.ascii.1 | |
140 (octets-to-string (ub8v 97 98 99) :encoding :ascii) | |
141 "abc") | |
142 | |
143 (deftest dec.ascii.2 () | |
144 (handler-case | |
145 (octets-to-string (ub8v 97 128 99) :encoding :ascii :errorp t) | |
146 (character-decoding-error (c) | |
147 (is (equalp #(128) (character-decoding-error-octets c))) | |
148 (is (eql 1 (character-coding-error-position c))) | |
149 (is (eq :ascii (character-coding-error-encoding c)))) | |
150 (:no-error (result) | |
151 (expected 'character-decoding-error :got result)))) | |
152 | |
153 (defstest dec.ascii.3 | |
154 (octets-to-string (ub8v 97 255 98 99) :encoding :ascii :errorp nil) | |
155 #(#\a #\Sub #\b #\c)) | |
156 | |
157 (defstest oct-count.ascii.1 | |
158 (string-size-in-octets "abc" :encoding :ascii) | |
159 3 3) | |
160 | |
161 (defstest char-count.ascii.1 | |
162 (vector-size-in-chars (ub8v 97 98 99) :encoding :ascii) | |
163 3 3) | |
164 | |
165 ;;;; UTF-8 | |
166 | |
167 (defstest char-count.utf-8.1 | |
168 ;; "ni hao" in hanzi with the last octet missing | |
169 (vector-size-in-chars (ub8v 228 189 160 229 165) :errorp nil) | |
170 2 5) | |
171 | |
172 (deftest char-count.utf-8.2 () | |
173 ;; same as above with the last 2 octets missing | |
174 (handler-case | |
175 (vector-size-in-chars (ub8v 228 189 160 229) :errorp t) | |
176 (end-of-input-in-character (c) | |
177 (is (equalp #(229) (character-decoding-error-octets c))) | |
178 (is (eql 3 (character-coding-error-position c))) | |
179 (is (eq :utf-8 (character-coding-error-encoding c)))) | |
180 (:no-error (result) | |
181 (expected 'end-of-input-in-character :got result)))) | |
182 | |
183 ;;; Lispworks bug? | |
184 ;; #+lispworks | |
185 ;; (pushnew 'dec.utf-8.1 rtest::*expected-failures*) | |
186 | |
187 (defstest dec.utf-8.1 | |
188 (octets-to-string (ub8v 228 189 160 229) :errorp nil) | |
189 #(#\u4f60 #\ufffd)) | |
190 | |
191 (deftest dec.utf-8.2 () | |
192 (handler-case | |
193 (octets-to-string (ub8v 228 189 160 229) :errorp t) | |
194 (end-of-input-in-character (c) | |
195 (is (equalp #(229) (character-decoding-error-octets c))) | |
196 (is (eql 3 (character-coding-error-position c))) | |
197 (is (eq :utf-8 (character-coding-error-encoding c)))) | |
198 (:no-error (result) | |
199 (expected 'end-of-input-in-character :got result)))) | |
200 | |
201 ;;;; UTF-16 | |
202 | |
203 ;;; Test that the BOM is not being counted as a character. | |
204 (deftest char-count.utf-16.bom () | |
205 (is (eql (vector-size-in-chars (ub8v #xfe #xff #x00 #x55 #x00 #x54 #x0… | |
206 :encoding :utf-16) | |
207 3)) | |
208 (is (eql (vector-size-in-chars (ub8v #xff #xfe #x00 #x55 #x00 #x54 #x0… | |
209 :encoding :utf-16) | |
210 3))) | |
211 | |
212 ;;;; UTF-32 | |
213 | |
214 ;;; RT: check that UTF-32 characters without a BOM are treated as | |
215 ;;; little-endian. | |
216 (deftest endianness.utf-32.no-bom () | |
217 (is (string= "a" (octets-to-string (ub8v 0 0 0 97) :encoding :utf-32))… | |
218 | |
219 ;;;; MORE TESTS | |
220 | |
221 (defparameter *standard-characters* | |
222 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'()… | |
223 | |
224 ;;; Testing consistency by encoding and decoding a simple string for | |
225 ;;; all character encodings. | |
226 (deftest rw-equiv.1 () | |
227 (let ((compatible-encodings (remove :ebcdic-international (list-charac… | |
228 (dolist (*default-character-encoding* compatible-encodings) | |
229 (let ((octets (string-to-octets *standard-characters*))) | |
230 (is (string= (octets-to-string octets) *standard-characters*))))… | |
231 | |
232 ;;; FIXME: assumes little-endianness. Easily fixable when we | |
233 ;;; implement the BE and LE variants of :UTF-16. | |
234 (deftest concatenate-strings-to-octets-equiv.1 () | |
235 (let ((foo (octets-to-string (ub8v 102 195 186 195 186) | |
236 :encoding :utf-8)) | |
237 (bar (octets-to-string (ub8v 98 195 161 114) | |
238 :encoding :utf-8))) | |
239 ;; note: FOO and BAR are not ascii | |
240 (is (equalp (concatenate-strings-to-octets :utf-8 foo bar) | |
241 (ub8v 102 195 186 195 186 98 195 161 114))) | |
242 (is (equalp (concatenate-strings-to-octets :utf-16 foo bar) | |
243 (ub8v 102 0 250 0 250 0 98 0 225 0 114 0))))) | |
244 | |
245 ;;;; Testing against files generated by GNU iconv. | |
246 | |
247 (defun test-file (name type) | |
248 (uiop:subpathname (asdf:system-relative-pathname "babel-tests" "tests/… | |
249 name :type type)) | |
250 | |
251 (defun read-test-file (name type) | |
252 (with-open-file (in (test-file name type) :element-type '(unsigned-byt… | |
253 (let* ((data (loop for byte = (read-byte in nil nil) | |
254 until (null byte) collect byte))) | |
255 (make-array (length data) :element-type '(unsigned-byte 8) | |
256 :initial-contents data)))) | |
257 | |
258 (deftest test-encoding (enc &optional input-enc-name) | |
259 (let* ((*default-character-encoding* enc) | |
260 (enc-name (string-downcase (symbol-name enc))) | |
261 (utf8-octets (read-test-file enc-name "txt-utf8")) | |
262 (foo-octets (read-test-file (or input-enc-name enc-name) "txt")) | |
263 (utf8-string (octets-to-string utf8-octets :encoding :utf-8 :er… | |
264 (foo-string (octets-to-string foo-octets :errorp t))) | |
265 (is (string= utf8-string foo-string)) | |
266 (is (= (length foo-string) (vector-size-in-chars foo-octets :errorp … | |
267 (unless (member enc '(:utf-16 :utf-32)) | |
268 ;; FIXME: skipping UTF-16 and UTF-32 because of the BOMs and | |
269 ;; because the input might not be in native-endian order so the | |
270 ;; comparison will fail there. | |
271 (let ((new-octets (string-to-octets foo-string :errorp t))) | |
272 (is (equalp new-octets foo-octets)) | |
273 (is (eql (length foo-octets) | |
274 (string-size-in-octets foo-string :errorp t))))))) | |
275 | |
276 (deftest iconv-test () | |
277 (dolist (enc '(:ascii :ebcdic-us :utf-8 :utf-16 :utf-32)) | |
278 (case enc | |
279 (:utf-16 (test-encoding :utf-16 "utf-16-with-le-bom")) | |
280 (:utf-32 (test-encoding :utf-32 "utf-32-with-le-bom"))) | |
281 (test-encoding enc))) | |
282 | |
283 ;;; RT: accept encoding objects in LOOKUP-MAPPING etc. | |
284 (defstest encoding-objects.1 | |
285 (string-to-octets "abc" :encoding (get-character-encoding :ascii)) | |
286 #(97 98 99)) | |
287 | |
288 (defmacro with-sharp-backslash-syntax (&body body) | |
289 `(let ((*readtable* (copy-readtable *readtable*))) | |
290 (set-sharp-backslash-syntax-in-readtable) | |
291 ,@body)) | |
292 | |
293 (defstest sharp-backslash.1 | |
294 (with-sharp-backslash-syntax | |
295 (loop for string in '("#\\a" "#\\u" "#\\ued") | |
296 collect (char-code (read-from-string string)))) | |
297 (97 117 #xed)) | |
298 | |
299 (deftest sharp-backslash.2 () | |
300 (signals reader-error (with-sharp-backslash-syntax | |
301 (read-from-string "#\\u12zz")))) | |
302 | |
303 (deftest test-read-from-string (string object position) | |
304 "Test that (read-from-string STRING) returns values OBJECT and POSITIO… | |
305 (multiple-value-bind (obj pos) | |
306 (read-from-string string) | |
307 (is (eql object obj)) | |
308 (is (eql position pos)))) | |
309 | |
310 ;;; RT: our #\ reader didn't honor *READ-SUPPRESS*. | |
311 (deftest sharp-backslash.3 () | |
312 (with-sharp-backslash-syntax | |
313 (let ((*read-suppress* t)) | |
314 (test-read-from-string "#\\ujunk" nil 7) | |
315 (test-read-from-string "#\\u12zz" nil 7)))) | |
316 | |
317 ;;; RT: the slow implementation of with-simple-vector was buggy. | |
318 (defstest string-to-octets.1 | |
319 (code-char (aref (string-to-octets "abc" :start 1 :end 2) 0)) | |
320 #\b) | |
321 | |
322 (defstest simple-base-string.1 | |
323 (string-to-octets (coerce "abc" 'base-string) :encoding :ascii) | |
324 #(97 98 99)) | |
325 | |
326 ;;; For now, disable this tests for Lisps that are strict about | |
327 ;;; non-character code points. In the future, simply mark them as | |
328 ;;; expected failures. | |
329 #-(or abcl ccl) | |
330 (progn | |
331 (defstest utf-8b.1 | |
332 (string-to-octets (coerce #(#\a #\b #\udcf0) 'unicode-string) | |
333 :encoding :utf-8b) | |
334 #(97 98 #xf0)) | |
335 | |
336 #+#:temporarily-disabled | |
337 (defstest utf-8b.2 | |
338 (octets-to-string (ub8v 97 98 #xcd) :encoding :utf-8b) | |
339 #(#\a #\b #\udccd)) | |
340 | |
341 (defstest utf-8b.3 | |
342 (octets-to-string (ub8v 97 #xf0 #xf1 #xff #x01) :encoding :utf-8b) | |
343 #(#\a #\udcf0 #\udcf1 #\udcff #\udc01)) | |
344 | |
345 (deftest utf-8b.4 () | |
346 (let* ((octets (coerce (loop repeat 8192 collect (random (+ #x82))) | |
347 '(array (unsigned-byte 8) (*)))) | |
348 (string (octets-to-string octets :encoding :utf-8b))) | |
349 (is (equalp octets (string-to-octets string :encoding :utf-8b)))))) | |
350 | |
351 ;;; The following tests have been adapted from SBCL's | |
352 ;;; tests/octets.pure.lisp file. | |
353 | |
354 (deftest ensure-roundtrip-ascii () | |
355 (let ((octets (make-ub8-vector 128))) | |
356 (dotimes (i 128) | |
357 (setf (aref octets i) i)) | |
358 (let* ((str (octets-to-string octets :encoding :ascii)) | |
359 (oct2 (string-to-octets str :encoding :ascii))) | |
360 (is (= (length octets) (length oct2))) | |
361 (is (every #'= octets oct2))))) | |
362 | |
363 (deftest test-8bit-roundtrip (enc) | |
364 (let ((octets (make-ub8-vector 256))) | |
365 (dotimes (i 256) | |
366 (setf (aref octets i) i)) | |
367 (let* ((str (octets-to-string octets :encoding enc))) | |
368 ;; remove the undefined code-points because they translate | |
369 ;; to #xFFFD and string-to-octets raises an error when | |
370 ;; encoding #xFFFD | |
371 (multiple-value-bind (filtered-str filtered-octets) | |
372 (let ((s (make-array 0 :element-type 'character | |
373 :adjustable t :fill-pointer 0)) | |
374 (o (make-array 0 :element-type '(unsigned-byte 16) | |
375 :adjustable t :fill-pointer 0))) | |
376 (loop for i below 256 | |
377 for c = (aref str i) | |
378 when (/= (char-code c) #xFFFD) | |
379 do (vector-push-extend c s) | |
380 (vector-push-extend (aref octets i) o)) | |
381 (values s o)) | |
382 (let ((oct2 (string-to-octets filtered-str :encoding enc))) | |
383 (is (eql (length filtered-octets) (length oct2))) | |
384 (is (every #'eql filtered-octets oct2))))))) | |
385 | |
386 (defparameter *iso-8859-charsets* | |
387 '(:iso-8859-1 :iso-8859-2 :iso-8859-3 :iso-8859-4 :iso-8859-5 :iso-885… | |
388 :iso-8859-7 :iso-8859-8 :iso-8859-9 :iso-8859-10 :iso-8859-11 :iso-8… | |
389 :iso-8859-14 :iso-8859-15 :iso-8859-16)) | |
390 | |
391 ;;; Don't actually see what comes out, but there shouldn't be any | |
392 ;;; errors. | |
393 (deftest iso-8859-roundtrip-no-checking () | |
394 (loop for enc in *iso-8859-charsets* do (test-8bit-roundtrip enc))) | |
395 | |
396 (deftest ensure-roundtrip-latin () | |
397 (loop for enc in '(:latin1 :latin9) do (test-8bit-roundtrip enc))) | |
398 | |
399 ;;; Latin-9 chars; the previous test checked roundtrip from | |
400 ;;; octets->char and back, now test that the latin-9 characters did in | |
401 ;;; fact appear during that trip. | |
402 (deftest ensure-roundtrip-latin9 () | |
403 (let ((l9c (map 'string #'code-char '(8364 352 353 381 382 338 339 376… | |
404 (is (string= (octets-to-string (string-to-octets l9c :encoding :lati… | |
405 :encoding :latin9) | |
406 l9c)))) | |
407 | |
408 ;; Expected to fail on Lisps that are strict about non-character code | |
409 ;; points. Mark this as an expected failure when Stefil supports such | |
410 ;; a feature. | |
411 #-(or abcl ccl) | |
412 (deftest code-char-nilness () | |
413 (is (loop for i below unicode-char-code-limit | |
414 never (null (code-char i))))) | |
415 | |
416 (deftest test-unicode-roundtrip (enc) | |
417 (let ((string (make-string unicode-char-code-limit))) | |
418 (dotimes (i unicode-char-code-limit) | |
419 (setf (char string i) | |
420 (if (or (<= #xD800 i #xDFFF) | |
421 (<= #xFDD0 i #xFDEF) | |
422 (eql (logand i #xFFFF) #xFFFF) | |
423 (eql (logand i #xFFFF) #xFFFE)) | |
424 #\? ; don't try to encode non-characters. | |
425 (code-char i)))) | |
426 (let ((string2 (octets-to-string | |
427 (string-to-octets string :encoding enc :errorp t) | |
428 :encoding enc :errorp t))) | |
429 (is (eql (length string2) (length string))) | |
430 (is (string= string string2))))) | |
431 | |
432 (deftest ensure-roundtrip.utf8 () | |
433 (test-unicode-roundtrip :utf-8)) | |
434 | |
435 (deftest ensure-roundtrip.utf16 () | |
436 (test-unicode-roundtrip :utf-16)) | |
437 | |
438 (deftest ensure-roundtrip.utf32 () | |
439 (test-unicode-roundtrip :utf-32)) | |
440 | |
441 #+sbcl | |
442 (progn | |
443 (deftest test-encode-against-sbcl (enc) | |
444 (let ((string (make-string unicode-char-code-limit))) | |
445 (dotimes (i unicode-char-code-limit) | |
446 (setf (char string i) (code-char i))) | |
447 (loop for ch across string | |
448 for babel = (string-to-octets (string ch) :encoding enc) | |
449 for sbcl = (sb-ext:string-to-octets (string ch) | |
450 :external-format enc) | |
451 do (is (equalp babel sbcl))))) | |
452 | |
453 ;; not run automatically because it's a bit slow (1114112 assertions) | |
454 (deftest (test-encode-against-sbcl.utf-8 :auto-call nil) () | |
455 (test-encode-against-sbcl :utf-8))) | |
456 | |
457 (deftest non-ascii-bytes () | |
458 (let ((octets (make-array 128 | |
459 :element-type '(unsigned-byte 8) | |
460 :initial-contents (loop for i from 128 below… | |
461 collect i)))) | |
462 (is (string= (octets-to-string octets :encoding :ascii :errorp nil) | |
463 (make-string 128 :initial-element #\Sub))))) | |
464 | |
465 (deftest non-ascii-chars () | |
466 (let ((string (make-array 128 | |
467 :element-type 'character | |
468 :initial-contents (loop for i from 128 below… | |
469 collect (code-char i… | |
470 (is (equalp (string-to-octets string :encoding :ascii :errorp nil) | |
471 (make-array 128 :initial-element (char-code #\Sub)))))) | |
472 | |
473 ;;;; The following UTF-8 decoding tests are adapted from | |
474 ;;;; <http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>. | |
475 | |
476 (deftest utf8-decode-test (octets expected-results expected-errors) | |
477 (let ((string (octets-to-string (coerce octets '(vector (unsigned-byte… | |
478 :encoding :utf-8 :errorp nil))) | |
479 (is (string= expected-results string)) | |
480 (is (= (count #\ufffd string) expected-errors)))) | |
481 | |
482 (deftest utf8-decode-tests (octets expected-results) | |
483 (setf expected-results (coerce expected-results '(simple-array charact… | |
484 (let ((expected-errors (count #\? expected-results)) | |
485 (expected-results (substitute #\ufffd #\? expected-results))) | |
486 (utf8-decode-test octets expected-results expected-errors) | |
487 (utf8-decode-test (concatenate 'vector '(34) octets '(34)) | |
488 (format nil "\"~A\"" expected-results) | |
489 expected-errors))) | |
490 | |
491 (deftest utf8-too-big-characters () | |
492 (utf8-decode-tests #(#xf4 #x90 #x80 #x80) "?") ; #x110000 | |
493 (utf8-decode-tests #(#xf7 #xbf #xbf #xbf) "?") ; #x1fffff | |
494 (utf8-decode-tests #(#xf8 #x88 #x80 #x80 #x80) "?") ; #x200000 | |
495 (utf8-decode-tests #(#xfb #xbf #xbf #xbf #xbf) "?") ; #x3ffffff | |
496 (utf8-decode-tests #(#xfc #x84 #x80 #x80 #x80 #x80) "?") ; #x4000000e | |
497 (utf8-decode-tests #(#xfd #xbf #xbf #xbf #xbf #xbf) "?")) ; #x7fffffff | |
498 | |
499 (deftest utf8-unexpected-continuation-bytes () | |
500 (utf8-decode-tests #(#x80) "?") | |
501 (utf8-decode-tests #(#xbf) "?") | |
502 (utf8-decode-tests #(#x80 #xbf) "??") | |
503 (utf8-decode-tests #(#x80 #xbf #x80) "???") | |
504 (utf8-decode-tests #(#x80 #xbf #x80 #xbf) "????") | |
505 (utf8-decode-tests #(#x80 #xbf #x80 #xbf #x80) "?????") | |
506 (utf8-decode-tests #(#x80 #xbf #x80 #xbf #x80 #xbf) "??????") | |
507 (utf8-decode-tests #(#x80 #xbf #x80 #xbf #x80 #xbf #x80) "???????")) | |
508 | |
509 ;;; All 64 continuation bytes in a row. | |
510 (deftest utf8-continuation-bytes () | |
511 (apply #'utf8-decode-tests | |
512 (loop for i from #x80 to #xbf | |
513 collect i into bytes | |
514 collect #\? into chars | |
515 finally (return (list bytes | |
516 (coerce chars 'string)))))) | |
517 | |
518 (deftest utf8-lonely-start-characters () | |
519 (flet ((lsc (first last) | |
520 (apply #'utf8-decode-tests | |
521 (loop for i from first to last | |
522 nconc (list i 32) into bytes | |
523 nconc (list #\? #\Space) into chars | |
524 finally (return (list bytes (coerce chars 'strin… | |
525 (apply #'utf8-decode-tests | |
526 (loop for i from first to last | |
527 collect i into bytes | |
528 collect #\? into chars | |
529 finally (return | |
530 (list bytes (coerce chars 'string)))))… | |
531 (lsc #xc0 #xdf) ; 2-byte sequence start chars | |
532 (lsc #xe0 #xef) ; 3-byte | |
533 (lsc #xf0 #xf7) ; 4-byte | |
534 (lsc #xf8 #xfb) ; 5-byte | |
535 (lsc #xfc #xfd))) ; 6-byte | |
536 | |
537 ;;; Otherwise incomplete sequences (last continuation byte missing) | |
538 (deftest utf8-incomplete-sequences () | |
539 (utf8-decode-tests #0=#(#xc0) "?") | |
540 (utf8-decode-tests #1=#(#xe0 #x80) "?") | |
541 (utf8-decode-tests #2=#(#xf0 #x80 #x80) "?") | |
542 (utf8-decode-tests #3=#(#xf8 #x80 #x80 #x80) "?") | |
543 (utf8-decode-tests #4=#(#xfc #x80 #x80 #x80 #x80) "?") | |
544 (utf8-decode-tests #5=#(#xdf) "?") | |
545 (utf8-decode-tests #6=#(#xef #xbf) "?") | |
546 (utf8-decode-tests #7=#(#xf7 #xbf #xbf) "?") | |
547 (utf8-decode-tests #8=#(#xfb #xbf #xbf #xbf) "?") | |
548 (utf8-decode-tests #9=#(#xfd #xbf #xbf #xbf #xbf) "?") | |
549 ;; All ten previous tests concatenated | |
550 (utf8-decode-tests (concatenate 'vector | |
551 #0# #1# #2# #3# #4# #5# #6# #7# #8# #9… | |
552 "??????????")) | |
553 | |
554 (deftest utf8-random-impossible-bytes () | |
555 (utf8-decode-tests #(#xfe) "?") | |
556 (utf8-decode-tests #(#xff) "?") | |
557 (utf8-decode-tests #(#xfe #xfe #xff #xff) "????")) | |
558 | |
559 (deftest utf8-overlong-sequences-/ () | |
560 (utf8-decode-tests #(#xc0 #xaf) "?") | |
561 (utf8-decode-tests #(#xe0 #x80 #xaf) "?") | |
562 (utf8-decode-tests #(#xf0 #x80 #x80 #xaf) "?") | |
563 (utf8-decode-tests #(#xf8 #x80 #x80 #x80 #xaf) "?") | |
564 (utf8-decode-tests #(#xfc #x80 #x80 #x80 #x80 #xaf) "?")) | |
565 | |
566 (deftest utf8-overlong-sequences-rubout () | |
567 (utf8-decode-tests #(#xc1 #xbf) "?") | |
568 (utf8-decode-tests #(#xe0 #x9f #xbf) "?") | |
569 (utf8-decode-tests #(#xf0 #x8f #xbf #xbf) "?") | |
570 (utf8-decode-tests #(#xf8 #x87 #xbf #xbf #xbf) "?") | |
571 (utf8-decode-tests #(#xfc #x83 #xbf #xbf #xbf #xbf) "?")) | |
572 | |
573 (deftest utf8-overlong-sequences-null () | |
574 (utf8-decode-tests #(#xc0 #x80) "?") | |
575 (utf8-decode-tests #(#xe0 #x80 #x80) "?") | |
576 (utf8-decode-tests #(#xf0 #x80 #x80 #x80) "?") | |
577 (utf8-decode-tests #(#xf8 #x80 #x80 #x80 #x80) "?") | |
578 (utf8-decode-tests #(#xfc #x80 #x80 #x80 #x80 #x80) "?")) | |
579 | |
580 ;;;; End of adapted SBCL tests. | |
581 | |
582 ;;; Expected to fail, for now. | |
583 #+#:ignore | |
584 (deftest utf8-illegal-code-positions () | |
585 ;; single UTF-16 surrogates | |
586 (utf8-decode-tests #(#xed #xa0 #x80) "?") | |
587 (utf8-decode-tests #(#xed #xad #xbf) "?") | |
588 (utf8-decode-tests #(#xed #xae #x80) "?") | |
589 (utf8-decode-tests #(#xed #xaf #xbf) "?") | |
590 (utf8-decode-tests #(#xed #xb0 #x80) "?") | |
591 (utf8-decode-tests #(#xed #xbe #x80) "?") | |
592 (utf8-decode-tests #(#xed #xbf #xbf) "?") | |
593 ;; paired UTF-16 surrogates | |
594 (utf8-decode-tests #(ed a0 80 ed b0 80) "??") | |
595 (utf8-decode-tests #(ed a0 80 ed bf bf) "??") | |
596 (utf8-decode-tests #(ed ad bf ed b0 80) "??") | |
597 (utf8-decode-tests #(ed ad bf ed bf bf) "??") | |
598 (utf8-decode-tests #(ed ae 80 ed b0 80) "??") | |
599 (utf8-decode-tests #(ed ae 80 ed bf bf) "??") | |
600 (utf8-decode-tests #(ed af bf ed b0 80) "??") | |
601 (utf8-decode-tests #(ed af bf ed bf bf) "??") | |
602 ;; other illegal code positions | |
603 (utf8-decode-tests #(#xef #xbf #xbe) "?") ; #\uFFFE | |
604 (utf8-decode-tests #(#xef #xbf #xbf) "?")) ; #\uFFFF | |
605 | |
606 ;;; A list of the ISO-8859 encodings where each element is a cons with | |
607 ;;; the car being a keyword denoting the encoding and the cdr being a | |
608 ;;; vector enumerating the corresponding character codes. | |
609 ;;; | |
610 ;;; It was auto-generated from files which can be found at | |
611 ;;; <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/>. | |
612 ;;; | |
613 ;;; Taken from flexi-streams. | |
614 (defparameter *iso-8859-tables* | |
615 '((:iso-8859-1 . | |
616 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
617 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
618 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
619 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
620 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
621 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
622 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
623 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 1… | |
624 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 1… | |
625 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 2… | |
626 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 2… | |
627 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 2… | |
628 243 244 245 246 247 248 249 250 251 252 253 254 255)) | |
629 | |
630 (:iso-8859-2 . | |
631 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
632 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
633 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
634 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
635 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
636 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
637 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
638 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 3… | |
639 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 3… | |
640 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 2… | |
641 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 2… | |
642 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 3… | |
643 243 244 337 246 247 345 367 250 369 252 253 355 729)) | |
644 | |
645 (:iso-8859-3 . | |
646 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
647 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
648 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
649 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
650 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
651 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
652 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
653 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168… | |
654 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305… | |
655 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 2… | |
656 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219… | |
657 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236… | |
658 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365… | |
659 729)) | |
660 | |
661 (:iso-8859-4 . | |
662 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
663 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
664 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
665 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
666 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
667 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
668 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
669 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 3… | |
670 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 2… | |
671 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 2… | |
672 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 2… | |
673 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 3… | |
674 311 244 245 246 247 248 371 250 251 252 361 363 729)) | |
675 | |
676 (:iso-8859-5 . | |
677 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
678 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
679 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
680 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
681 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
682 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
683 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
684 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 103… | |
685 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1… | |
686 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 … | |
687 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 … | |
688 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 … | |
689 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 … | |
690 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 … | |
691 167 1118 1119)) | |
692 | |
693 (:iso-8859-6 . | |
694 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
695 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
696 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
697 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
698 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
699 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
700 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
701 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533… | |
702 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65… | |
703 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 … | |
704 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580… | |
705 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 … | |
706 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1… | |
707 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 655… | |
708 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) | |
709 | |
710 (:iso-8859-7 . | |
711 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
712 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
713 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
714 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
715 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
716 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
717 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
718 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 1… | |
719 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 90… | |
720 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 9… | |
721 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940… | |
722 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 9… | |
723 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533… | |
724 | |
725 (:iso-8859-8 . | |
726 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
727 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
728 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
729 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
730 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
731 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
732 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
733 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168… | |
734 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 2… | |
735 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533… | |
736 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533… | |
737 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 … | |
738 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 … | |
739 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533… | |
740 8206 8207 65533)) | |
741 | |
742 (:iso-8859-9 . | |
743 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
744 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
745 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
746 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
747 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
748 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
749 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
750 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 1… | |
751 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 1… | |
752 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 2… | |
753 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 2… | |
754 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 2… | |
755 243 244 245 246 247 248 249 250 251 252 305 351 255)) | |
756 | |
757 (:iso-8859-10 . | |
758 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
759 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
760 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
761 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
762 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
763 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
764 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
765 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 2… | |
766 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 3… | |
767 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 … | |
768 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 2… | |
769 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 3… | |
770 243 244 245 246 361 248 371 250 251 252 253 254 312)) | |
771 | |
772 (:iso-8859-11 . | |
773 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
774 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
775 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
776 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
777 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
778 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
779 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
780 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 359… | |
781 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 … | |
782 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 … | |
783 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 … | |
784 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3… | |
785 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 … | |
786 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 … | |
787 65533 65533 65533 65533)) | |
788 | |
789 (:iso-8859-13 . | |
790 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
791 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
792 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
793 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
794 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
795 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
796 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
797 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216… | |
798 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 … | |
799 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 2… | |
800 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 3… | |
801 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 3… | |
802 326 243 333 245 246 247 371 322 347 363 252 380 382 8217)) | |
803 | |
804 (:iso-8859-14 . | |
805 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
806 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
807 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
808 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
809 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
810 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
811 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
812 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 78… | |
813 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7… | |
814 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 19… | |
815 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 … | |
816 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 2… | |
817 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 … | |
818 255)) | |
819 | |
820 (:iso-8859-15 . | |
821 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
822 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
823 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
824 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
825 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
826 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
827 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
828 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 … | |
829 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 1… | |
830 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 2… | |
831 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 2… | |
832 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 2… | |
833 243 244 245 246 247 248 249 250 251 252 253 254 255)) | |
834 | |
835 (:iso-8859-16 . | |
836 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
837 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
838 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
839 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
840 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
841 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
842 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
843 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353… | |
844 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 … | |
845 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 2… | |
846 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 5… | |
847 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 2… | |
848 242 243 244 337 246 347 369 249 250 251 252 281 539 255)))) | |
849 | |
850 (deftest iso-8859-decode-check () | |
851 (loop for enc in *iso-8859-charsets* | |
852 for octets = (let ((octets (make-ub8-vector 256))) | |
853 (dotimes (i 256 octets) | |
854 (setf (aref octets i) i))) | |
855 for string = (octets-to-string octets :encoding enc) | |
856 do (is (equalp (map 'vector #'char-code string) | |
857 (cdr (assoc enc *iso-8859-tables*)))))) | |
858 | |
859 (deftest character-out-of-range.utf-32 () | |
860 (signals character-out-of-range | |
861 (octets-to-string (ub8v 0 0 #xfe #xff 0 #x11 0 0) | |
862 :encoding :utf-32 :errorp t))) | |
863 | |
864 ;;; RT: encoders and decoders were returning bogus values. | |
865 (deftest encoder/decoder-retvals (encoding &optional (test-string (coerc… | |
866 (let* ((mapping (lookup-mapping babel::*string-vector-mappings* encodi… | |
867 (strlen (length test-string)) | |
868 ;; encoding | |
869 (octet-precount (funcall (octet-counter mapping) | |
870 test-string 0 strlen -1)) | |
871 (array (make-array octet-precount :element-type '(unsigned-byte… | |
872 (encoded-octet-count (funcall (encoder mapping) | |
873 test-string 0 strlen array 0)) | |
874 ;; decoding | |
875 (string (make-string strlen)) | |
876 (char-precount (funcall (code-point-counter mapping) | |
877 array 0 octet-precount -1)) | |
878 (char-count (funcall (decoder mapping) | |
879 array 0 octet-precount string 0))) | |
880 (is (= octet-precount encoded-octet-count)) | |
881 (is (= char-precount char-count)) | |
882 (is (string= test-string string)))) | |
883 | |
884 (deftest encoder-and-decoder-return-values () | |
885 (mapcar 'encoder/decoder-retvals | |
886 (remove-if 'ambiguous-encoding-p | |
887 (list-character-encodings)))) | |
888 | |
889 (deftest code-point-sweep (encoding) | |
890 (finishes | |
891 (dotimes (i char-code-limit) | |
892 (let ((char (ignore-errors (code-char i)))) | |
893 (when char | |
894 (ignore-some-conditions (character-encoding-error) | |
895 (string-to-octets (string char) :encoding encoding))))))) | |
896 | |
897 #+enable-slow-babel-tests | |
898 (deftest code-point-sweep-all-encodings () | |
899 (mapc #'code-point-sweep (list-character-encodings))) | |
900 | |
901 (deftest octet-sweep (encoding) | |
902 (finishes | |
903 (loop for b1 upto #xff do | |
904 (loop for b2 upto #xff do | |
905 (loop for b3 upto #xff do | |
906 (loop for b4 upto #xff do | |
907 (ignore-some-conditions (character-decoding-error) | |
908 (octets-to-string (ub8v b1 b2 b3 b4) :encoding encoding)))… | |
909 | |
910 #+enable-slow-babel-tests | |
911 (deftest octet-sweep-all-encodings () | |
912 (mapc #'octet-sweep (list-character-encodings))) |