| ttests.lisp - clic - Clic is an command line interactive client for gopher writ… | |
| git clone git://bitreich.org/clic/ git://hg6vgqziawt5s4dj.onion/clic/ | |
| Log | |
| Files | |
| Refs | |
| Tags | |
| LICENSE | |
| --- | |
| ttests.lisp (43891B) | |
| --- | |
| 1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*- | |
| 2 ;;; | |
| 3 ;;; tests.lisp --- Unit and regression tests for Babel. | |
| 4 ;;; | |
| 5 ;;; Copyright (C) 2007-2009, Luis Oliveira <[email protected]> | |
| 6 ;;; | |
| 7 ;;; Permission is hereby granted, free of charge, to any person | |
| 8 ;;; obtaining a copy of this software and associated documentation | |
| 9 ;;; files (the "Software"), to deal in the Software without | |
| 10 ;;; restriction, including without limitation the rights to use, copy, | |
| 11 ;;; modify, merge, publish, distribute, sublicense, and/or sell copies | |
| 12 ;;; of the Software, and to permit persons to whom the Software is | |
| 13 ;;; furnished to do so, subject to the following conditions: | |
| 14 ;;; | |
| 15 ;;; The above copyright notice and this permission notice shall be | |
| 16 ;;; included in all copies or substantial portions of the Software. | |
| 17 ;;; | |
| 18 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| 19 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| 20 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
| 21 ;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
| 22 ;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 23 ;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 24 ;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
| 25 ;;; DEALINGS IN THE SOFTWARE. | |
| 26 | |
| 27 (in-package #:cl-user) | |
| 28 (defpackage #:babel-tests | |
| 29 (:use #:common-lisp #:babel #:babel-encodings #:hu.dwim.stefil) | |
| 30 (:import-from #:alexandria #:ignore-some-conditions) | |
| 31 (:export #:run)) | |
| 32 (in-package #:babel-tests) | |
| 33 | |
| 34 (defun indented-format (level stream format-control &rest format-argumen… | |
| 35 (let ((line-prefix (make-string level :initial-element #\Space))) | |
| 36 (let ((output (format nil "~?~%" format-control format-arguments))) | |
| 37 (with-input-from-string (s output) | |
| 38 (loop for line = (read-line s nil nil) until (null line) | |
| 39 do (format stream "~A~A~%" line-prefix line)))))) | |
| 40 | |
| 41 ;; adapted from https://github.com/luismbo/stefil/blob/master/source/sui… | |
| 42 (defun describe-failed-tests (&key (result *last-test-result*) (stream t… | |
| 43 "Prints out a report for RESULT in STREAM. | |
| 44 | |
| 45 RESULT defaults to `*last-test-result*' and STREAM defaults to t" | |
| 46 (let ((descs (hu.dwim.stefil::failure-descriptions-of result))) | |
| 47 (cond ((zerop (length descs)) | |
| 48 (format stream "~&~%[no failures!]")) | |
| 49 (t | |
| 50 (format stream "~&~%Test failures:~%") | |
| 51 (dotimes (i (length descs)) | |
| 52 (let ((desc (aref descs i)) | |
| 53 format-control format-arguments) | |
| 54 ;; XXX: most of Stefil's conditions specialise DESCRIBE-O… | |
| 55 ;; with nice human-readable messages. We should add any m… | |
| 56 ;; ones (like UNEXPECTED-ERROR) and ditch this code. | |
| 57 (etypecase desc | |
| 58 (hu.dwim.stefil::unexpected-error | |
| 59 (let ((c (hu.dwim.stefil::condition-of desc))) | |
| 60 (typecase c | |
| 61 (simple-condition | |
| 62 (setf format-control (simple-condition-format-con… | |
| 63 (setf format-arguments | |
| 64 (simple-condition-format-arguments c))) | |
| 65 (t | |
| 66 (setf format-control "~S" | |
| 67 format-arguments (list c)))))) | |
| 68 (hu.dwim.stefil::failed-assertion | |
| 69 (setf format-control (hu.dwim.stefil::format-control-o… | |
| 70 format-arguments (hu.dwim.stefil::format-argumen… | |
| 71 (hu.dwim.stefil::missing-condition | |
| 72 (setf format-control "~A" | |
| 73 format-arguments (list (with-output-to-string (s… | |
| 74 (describe desc stream))… | |
| 75 (null | |
| 76 (setf format-control "Test succeeded!"))) | |
| 77 (format stream "~%Failure ~A: ~A when running ~S~%~%" | |
| 78 (1+ i) | |
| 79 (type-of desc) | |
| 80 (hu.dwim.stefil::name-of (hu.dwim.stefil::test-of… | |
| 81 (indented-format 4 stream "~?" format-control format-argu… | |
| 82 | |
| 83 (defun run () | |
| 84 (let ((test-run (without-debugging (babel-tests)))) | |
| 85 (print test-run) | |
| 86 (describe-failed-tests :result test-run) | |
| 87 (values (zerop (length (hu.dwim.stefil::failure-descriptions-of test… | |
| 88 test-run))) | |
| 89 | |
| 90 (defsuite* (babel-tests :in root-suite)) | |
| 91 | |
| 92 (defun ub8v (&rest contents) | |
| 93 (make-array (length contents) :element-type '(unsigned-byte 8) | |
| 94 :initial-contents contents)) | |
| 95 | |
| 96 (defun make-ub8-vector (size) | |
| 97 (make-array size :element-type '(unsigned-byte 8) | |
| 98 :initial-element 0)) | |
| 99 | |
| 100 (defmacro returns (form &rest values) | |
| 101 "Asserts, through EQUALP, that FORM returns VALUES." | |
| 102 `(is (equalp (multiple-value-list ,form) (list ,@values)))) | |
| 103 | |
| 104 (defmacro defstest (name form &body return-values) | |
| 105 "Similar to RT's DEFTEST." | |
| 106 `(deftest ,name () | |
| 107 (returns ,form ,@(mapcar (lambda (x) `',x) return-values)))) | |
| 108 | |
| 109 (defun fail (control-string &rest arguments) | |
| 110 (hu.dwim.stefil::record/failure 'hu.dwim.stefil::failed-assertion | |
| 111 :format-control control-string | |
| 112 :format-arguments arguments)) | |
| 113 | |
| 114 (defun expected (expected &key got) | |
| 115 (fail "expected ~A, got ~A instead" expected got)) | |
| 116 | |
| 117 (enable-sharp-backslash-syntax) | |
| 118 | |
| 119 ;;;; Simple tests using ASCII | |
| 120 | |
| 121 (defstest enc.ascii.1 | |
| 122 (string-to-octets "abc" :encoding :ascii) | |
| 123 #(97 98 99)) | |
| 124 | |
| 125 (defstest enc.ascii.2 | |
| 126 (string-to-octets (string #\uED) :encoding :ascii :errorp nil) | |
| 127 #(#x1a)) | |
| 128 | |
| 129 (deftest enc.ascii.3 () | |
| 130 (handler-case | |
| 131 (string-to-octets (string #\uED) :encoding :ascii :errorp t) | |
| 132 (character-encoding-error (c) | |
| 133 (is (eql 0 (character-coding-error-position c))) | |
| 134 (is (eq :ascii (character-coding-error-encoding c))) | |
| 135 (is (eql #xed (character-encoding-error-code c)))) | |
| 136 (:no-error (result) | |
| 137 (expected 'character-encoding-error :got result)))) | |
| 138 | |
| 139 (defstest dec.ascii.1 | |
| 140 (octets-to-string (ub8v 97 98 99) :encoding :ascii) | |
| 141 "abc") | |
| 142 | |
| 143 (deftest dec.ascii.2 () | |
| 144 (handler-case | |
| 145 (octets-to-string (ub8v 97 128 99) :encoding :ascii :errorp t) | |
| 146 (character-decoding-error (c) | |
| 147 (is (equalp #(128) (character-decoding-error-octets c))) | |
| 148 (is (eql 1 (character-coding-error-position c))) | |
| 149 (is (eq :ascii (character-coding-error-encoding c)))) | |
| 150 (:no-error (result) | |
| 151 (expected 'character-decoding-error :got result)))) | |
| 152 | |
| 153 (defstest dec.ascii.3 | |
| 154 (octets-to-string (ub8v 97 255 98 99) :encoding :ascii :errorp nil) | |
| 155 #(#\a #\Sub #\b #\c)) | |
| 156 | |
| 157 (defstest oct-count.ascii.1 | |
| 158 (string-size-in-octets "abc" :encoding :ascii) | |
| 159 3 3) | |
| 160 | |
| 161 (defstest char-count.ascii.1 | |
| 162 (vector-size-in-chars (ub8v 97 98 99) :encoding :ascii) | |
| 163 3 3) | |
| 164 | |
| 165 ;;;; UTF-8 | |
| 166 | |
| 167 (defstest char-count.utf-8.1 | |
| 168 ;; "ni hao" in hanzi with the last octet missing | |
| 169 (vector-size-in-chars (ub8v 228 189 160 229 165) :errorp nil) | |
| 170 2 5) | |
| 171 | |
| 172 (deftest char-count.utf-8.2 () | |
| 173 ;; same as above with the last 2 octets missing | |
| 174 (handler-case | |
| 175 (vector-size-in-chars (ub8v 228 189 160 229) :errorp t) | |
| 176 (end-of-input-in-character (c) | |
| 177 (is (equalp #(229) (character-decoding-error-octets c))) | |
| 178 (is (eql 3 (character-coding-error-position c))) | |
| 179 (is (eq :utf-8 (character-coding-error-encoding c)))) | |
| 180 (:no-error (result) | |
| 181 (expected 'end-of-input-in-character :got result)))) | |
| 182 | |
| 183 ;;; Lispworks bug? | |
| 184 ;; #+lispworks | |
| 185 ;; (pushnew 'dec.utf-8.1 rtest::*expected-failures*) | |
| 186 | |
| 187 (defstest dec.utf-8.1 | |
| 188 (octets-to-string (ub8v 228 189 160 229) :errorp nil) | |
| 189 #(#\u4f60 #\ufffd)) | |
| 190 | |
| 191 (deftest dec.utf-8.2 () | |
| 192 (handler-case | |
| 193 (octets-to-string (ub8v 228 189 160 229) :errorp t) | |
| 194 (end-of-input-in-character (c) | |
| 195 (is (equalp #(229) (character-decoding-error-octets c))) | |
| 196 (is (eql 3 (character-coding-error-position c))) | |
| 197 (is (eq :utf-8 (character-coding-error-encoding c)))) | |
| 198 (:no-error (result) | |
| 199 (expected 'end-of-input-in-character :got result)))) | |
| 200 | |
| 201 ;;;; UTF-16 | |
| 202 | |
| 203 ;;; Test that the BOM is not being counted as a character. | |
| 204 (deftest char-count.utf-16.bom () | |
| 205 (is (eql (vector-size-in-chars (ub8v #xfe #xff #x00 #x55 #x00 #x54 #x0… | |
| 206 :encoding :utf-16) | |
| 207 3)) | |
| 208 (is (eql (vector-size-in-chars (ub8v #xff #xfe #x00 #x55 #x00 #x54 #x0… | |
| 209 :encoding :utf-16) | |
| 210 3))) | |
| 211 | |
| 212 ;;;; UTF-32 | |
| 213 | |
| 214 ;;; RT: check that UTF-32 characters without a BOM are treated as | |
| 215 ;;; little-endian. | |
| 216 (deftest endianness.utf-32.no-bom () | |
| 217 (is (string= "a" (octets-to-string (ub8v 0 0 0 97) :encoding :utf-32))… | |
| 218 | |
| 219 ;;;; MORE TESTS | |
| 220 | |
| 221 (defparameter *standard-characters* | |
| 222 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'()… | |
| 223 | |
| 224 ;;; Testing consistency by encoding and decoding a simple string for | |
| 225 ;;; all character encodings. | |
| 226 (deftest rw-equiv.1 () | |
| 227 (let ((compatible-encodings (remove :ebcdic-international (list-charac… | |
| 228 (dolist (*default-character-encoding* compatible-encodings) | |
| 229 (let ((octets (string-to-octets *standard-characters*))) | |
| 230 (is (string= (octets-to-string octets) *standard-characters*))))… | |
| 231 | |
| 232 ;;; FIXME: assumes little-endianness. Easily fixable when we | |
| 233 ;;; implement the BE and LE variants of :UTF-16. | |
| 234 (deftest concatenate-strings-to-octets-equiv.1 () | |
| 235 (let ((foo (octets-to-string (ub8v 102 195 186 195 186) | |
| 236 :encoding :utf-8)) | |
| 237 (bar (octets-to-string (ub8v 98 195 161 114) | |
| 238 :encoding :utf-8))) | |
| 239 ;; note: FOO and BAR are not ascii | |
| 240 (is (equalp (concatenate-strings-to-octets :utf-8 foo bar) | |
| 241 (ub8v 102 195 186 195 186 98 195 161 114))) | |
| 242 (is (equalp (concatenate-strings-to-octets :utf-16 foo bar) | |
| 243 (ub8v 102 0 250 0 250 0 98 0 225 0 114 0))))) | |
| 244 | |
| 245 ;;;; Testing against files generated by GNU iconv. | |
| 246 | |
| 247 (defun test-file (name type) | |
| 248 (uiop:subpathname (asdf:system-relative-pathname "babel-tests" "tests/… | |
| 249 name :type type)) | |
| 250 | |
| 251 (defun read-test-file (name type) | |
| 252 (with-open-file (in (test-file name type) :element-type '(unsigned-byt… | |
| 253 (let* ((data (loop for byte = (read-byte in nil nil) | |
| 254 until (null byte) collect byte))) | |
| 255 (make-array (length data) :element-type '(unsigned-byte 8) | |
| 256 :initial-contents data)))) | |
| 257 | |
| 258 (deftest test-encoding (enc &optional input-enc-name) | |
| 259 (let* ((*default-character-encoding* enc) | |
| 260 (enc-name (string-downcase (symbol-name enc))) | |
| 261 (utf8-octets (read-test-file enc-name "txt-utf8")) | |
| 262 (foo-octets (read-test-file (or input-enc-name enc-name) "txt")) | |
| 263 (utf8-string (octets-to-string utf8-octets :encoding :utf-8 :er… | |
| 264 (foo-string (octets-to-string foo-octets :errorp t))) | |
| 265 (is (string= utf8-string foo-string)) | |
| 266 (is (= (length foo-string) (vector-size-in-chars foo-octets :errorp … | |
| 267 (unless (member enc '(:utf-16 :utf-32)) | |
| 268 ;; FIXME: skipping UTF-16 and UTF-32 because of the BOMs and | |
| 269 ;; because the input might not be in native-endian order so the | |
| 270 ;; comparison will fail there. | |
| 271 (let ((new-octets (string-to-octets foo-string :errorp t))) | |
| 272 (is (equalp new-octets foo-octets)) | |
| 273 (is (eql (length foo-octets) | |
| 274 (string-size-in-octets foo-string :errorp t))))))) | |
| 275 | |
| 276 (deftest iconv-test () | |
| 277 (dolist (enc '(:ascii :ebcdic-us :utf-8 :utf-16 :utf-32)) | |
| 278 (case enc | |
| 279 (:utf-16 (test-encoding :utf-16 "utf-16-with-le-bom")) | |
| 280 (:utf-32 (test-encoding :utf-32 "utf-32-with-le-bom"))) | |
| 281 (test-encoding enc))) | |
| 282 | |
| 283 ;;; RT: accept encoding objects in LOOKUP-MAPPING etc. | |
| 284 (defstest encoding-objects.1 | |
| 285 (string-to-octets "abc" :encoding (get-character-encoding :ascii)) | |
| 286 #(97 98 99)) | |
| 287 | |
| 288 (defmacro with-sharp-backslash-syntax (&body body) | |
| 289 `(let ((*readtable* (copy-readtable *readtable*))) | |
| 290 (set-sharp-backslash-syntax-in-readtable) | |
| 291 ,@body)) | |
| 292 | |
| 293 (defstest sharp-backslash.1 | |
| 294 (with-sharp-backslash-syntax | |
| 295 (loop for string in '("#\\a" "#\\u" "#\\ued") | |
| 296 collect (char-code (read-from-string string)))) | |
| 297 (97 117 #xed)) | |
| 298 | |
| 299 (deftest sharp-backslash.2 () | |
| 300 (signals reader-error (with-sharp-backslash-syntax | |
| 301 (read-from-string "#\\u12zz")))) | |
| 302 | |
| 303 (deftest test-read-from-string (string object position) | |
| 304 "Test that (read-from-string STRING) returns values OBJECT and POSITIO… | |
| 305 (multiple-value-bind (obj pos) | |
| 306 (read-from-string string) | |
| 307 (is (eql object obj)) | |
| 308 (is (eql position pos)))) | |
| 309 | |
| 310 ;;; RT: our #\ reader didn't honor *READ-SUPPRESS*. | |
| 311 (deftest sharp-backslash.3 () | |
| 312 (with-sharp-backslash-syntax | |
| 313 (let ((*read-suppress* t)) | |
| 314 (test-read-from-string "#\\ujunk" nil 7) | |
| 315 (test-read-from-string "#\\u12zz" nil 7)))) | |
| 316 | |
| 317 ;;; RT: the slow implementation of with-simple-vector was buggy. | |
| 318 (defstest string-to-octets.1 | |
| 319 (code-char (aref (string-to-octets "abc" :start 1 :end 2) 0)) | |
| 320 #\b) | |
| 321 | |
| 322 (defstest simple-base-string.1 | |
| 323 (string-to-octets (coerce "abc" 'base-string) :encoding :ascii) | |
| 324 #(97 98 99)) | |
| 325 | |
| 326 ;;; For now, disable this tests for Lisps that are strict about | |
| 327 ;;; non-character code points. In the future, simply mark them as | |
| 328 ;;; expected failures. | |
| 329 #-(or abcl ccl) | |
| 330 (progn | |
| 331 (defstest utf-8b.1 | |
| 332 (string-to-octets (coerce #(#\a #\b #\udcf0) 'unicode-string) | |
| 333 :encoding :utf-8b) | |
| 334 #(97 98 #xf0)) | |
| 335 | |
| 336 #+#:temporarily-disabled | |
| 337 (defstest utf-8b.2 | |
| 338 (octets-to-string (ub8v 97 98 #xcd) :encoding :utf-8b) | |
| 339 #(#\a #\b #\udccd)) | |
| 340 | |
| 341 (defstest utf-8b.3 | |
| 342 (octets-to-string (ub8v 97 #xf0 #xf1 #xff #x01) :encoding :utf-8b) | |
| 343 #(#\a #\udcf0 #\udcf1 #\udcff #\udc01)) | |
| 344 | |
| 345 (deftest utf-8b.4 () | |
| 346 (let* ((octets (coerce (loop repeat 8192 collect (random (+ #x82))) | |
| 347 '(array (unsigned-byte 8) (*)))) | |
| 348 (string (octets-to-string octets :encoding :utf-8b))) | |
| 349 (is (equalp octets (string-to-octets string :encoding :utf-8b)))))) | |
| 350 | |
| 351 ;;; The following tests have been adapted from SBCL's | |
| 352 ;;; tests/octets.pure.lisp file. | |
| 353 | |
| 354 (deftest ensure-roundtrip-ascii () | |
| 355 (let ((octets (make-ub8-vector 128))) | |
| 356 (dotimes (i 128) | |
| 357 (setf (aref octets i) i)) | |
| 358 (let* ((str (octets-to-string octets :encoding :ascii)) | |
| 359 (oct2 (string-to-octets str :encoding :ascii))) | |
| 360 (is (= (length octets) (length oct2))) | |
| 361 (is (every #'= octets oct2))))) | |
| 362 | |
| 363 (deftest test-8bit-roundtrip (enc) | |
| 364 (let ((octets (make-ub8-vector 256))) | |
| 365 (dotimes (i 256) | |
| 366 (setf (aref octets i) i)) | |
| 367 (let* ((str (octets-to-string octets :encoding enc))) | |
| 368 ;; remove the undefined code-points because they translate | |
| 369 ;; to #xFFFD and string-to-octets raises an error when | |
| 370 ;; encoding #xFFFD | |
| 371 (multiple-value-bind (filtered-str filtered-octets) | |
| 372 (let ((s (make-array 0 :element-type 'character | |
| 373 :adjustable t :fill-pointer 0)) | |
| 374 (o (make-array 0 :element-type '(unsigned-byte 16) | |
| 375 :adjustable t :fill-pointer 0))) | |
| 376 (loop for i below 256 | |
| 377 for c = (aref str i) | |
| 378 when (/= (char-code c) #xFFFD) | |
| 379 do (vector-push-extend c s) | |
| 380 (vector-push-extend (aref octets i) o)) | |
| 381 (values s o)) | |
| 382 (let ((oct2 (string-to-octets filtered-str :encoding enc))) | |
| 383 (is (eql (length filtered-octets) (length oct2))) | |
| 384 (is (every #'eql filtered-octets oct2))))))) | |
| 385 | |
| 386 (defparameter *iso-8859-charsets* | |
| 387 '(:iso-8859-1 :iso-8859-2 :iso-8859-3 :iso-8859-4 :iso-8859-5 :iso-885… | |
| 388 :iso-8859-7 :iso-8859-8 :iso-8859-9 :iso-8859-10 :iso-8859-11 :iso-8… | |
| 389 :iso-8859-14 :iso-8859-15 :iso-8859-16)) | |
| 390 | |
| 391 ;;; Don't actually see what comes out, but there shouldn't be any | |
| 392 ;;; errors. | |
| 393 (deftest iso-8859-roundtrip-no-checking () | |
| 394 (loop for enc in *iso-8859-charsets* do (test-8bit-roundtrip enc))) | |
| 395 | |
| 396 (deftest ensure-roundtrip-latin () | |
| 397 (loop for enc in '(:latin1 :latin9) do (test-8bit-roundtrip enc))) | |
| 398 | |
| 399 ;;; Latin-9 chars; the previous test checked roundtrip from | |
| 400 ;;; octets->char and back, now test that the latin-9 characters did in | |
| 401 ;;; fact appear during that trip. | |
| 402 (deftest ensure-roundtrip-latin9 () | |
| 403 (let ((l9c (map 'string #'code-char '(8364 352 353 381 382 338 339 376… | |
| 404 (is (string= (octets-to-string (string-to-octets l9c :encoding :lati… | |
| 405 :encoding :latin9) | |
| 406 l9c)))) | |
| 407 | |
| 408 ;; Expected to fail on Lisps that are strict about non-character code | |
| 409 ;; points. Mark this as an expected failure when Stefil supports such | |
| 410 ;; a feature. | |
| 411 #-(or abcl ccl) | |
| 412 (deftest code-char-nilness () | |
| 413 (is (loop for i below unicode-char-code-limit | |
| 414 never (null (code-char i))))) | |
| 415 | |
| 416 (deftest test-unicode-roundtrip (enc) | |
| 417 (let ((string (make-string unicode-char-code-limit))) | |
| 418 (dotimes (i unicode-char-code-limit) | |
| 419 (setf (char string i) | |
| 420 (if (or (<= #xD800 i #xDFFF) | |
| 421 (<= #xFDD0 i #xFDEF) | |
| 422 (eql (logand i #xFFFF) #xFFFF) | |
| 423 (eql (logand i #xFFFF) #xFFFE)) | |
| 424 #\? ; don't try to encode non-characters. | |
| 425 (code-char i)))) | |
| 426 (let ((string2 (octets-to-string | |
| 427 (string-to-octets string :encoding enc :errorp t) | |
| 428 :encoding enc :errorp t))) | |
| 429 (is (eql (length string2) (length string))) | |
| 430 (is (string= string string2))))) | |
| 431 | |
| 432 (deftest ensure-roundtrip.utf8 () | |
| 433 (test-unicode-roundtrip :utf-8)) | |
| 434 | |
| 435 (deftest ensure-roundtrip.utf16 () | |
| 436 (test-unicode-roundtrip :utf-16)) | |
| 437 | |
| 438 (deftest ensure-roundtrip.utf32 () | |
| 439 (test-unicode-roundtrip :utf-32)) | |
| 440 | |
| 441 #+sbcl | |
| 442 (progn | |
| 443 (deftest test-encode-against-sbcl (enc) | |
| 444 (let ((string (make-string unicode-char-code-limit))) | |
| 445 (dotimes (i unicode-char-code-limit) | |
| 446 (setf (char string i) (code-char i))) | |
| 447 (loop for ch across string | |
| 448 for babel = (string-to-octets (string ch) :encoding enc) | |
| 449 for sbcl = (sb-ext:string-to-octets (string ch) | |
| 450 :external-format enc) | |
| 451 do (is (equalp babel sbcl))))) | |
| 452 | |
| 453 ;; not run automatically because it's a bit slow (1114112 assertions) | |
| 454 (deftest (test-encode-against-sbcl.utf-8 :auto-call nil) () | |
| 455 (test-encode-against-sbcl :utf-8))) | |
| 456 | |
| 457 (deftest non-ascii-bytes () | |
| 458 (let ((octets (make-array 128 | |
| 459 :element-type '(unsigned-byte 8) | |
| 460 :initial-contents (loop for i from 128 below… | |
| 461 collect i)))) | |
| 462 (is (string= (octets-to-string octets :encoding :ascii :errorp nil) | |
| 463 (make-string 128 :initial-element #\Sub))))) | |
| 464 | |
| 465 (deftest non-ascii-chars () | |
| 466 (let ((string (make-array 128 | |
| 467 :element-type 'character | |
| 468 :initial-contents (loop for i from 128 below… | |
| 469 collect (code-char i… | |
| 470 (is (equalp (string-to-octets string :encoding :ascii :errorp nil) | |
| 471 (make-array 128 :initial-element (char-code #\Sub)))))) | |
| 472 | |
| 473 ;;;; The following UTF-8 decoding tests are adapted from | |
| 474 ;;;; <http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>. | |
| 475 | |
| 476 (deftest utf8-decode-test (octets expected-results expected-errors) | |
| 477 (let ((string (octets-to-string (coerce octets '(vector (unsigned-byte… | |
| 478 :encoding :utf-8 :errorp nil))) | |
| 479 (is (string= expected-results string)) | |
| 480 (is (= (count #\ufffd string) expected-errors)))) | |
| 481 | |
| 482 (deftest utf8-decode-tests (octets expected-results) | |
| 483 (let ((expected-errors (count #\? expected-results)) | |
| 484 (expected-results (substitute #\ufffd #\? expected-results))) | |
| 485 (utf8-decode-test octets expected-results expected-errors) | |
| 486 (utf8-decode-test (concatenate 'vector '(34) octets '(34)) | |
| 487 (format nil "\"~A\"" expected-results) | |
| 488 expected-errors))) | |
| 489 | |
| 490 (deftest utf8-too-big-characters () | |
| 491 (utf8-decode-tests #(#xf4 #x90 #x80 #x80) "?") ; #x110000 | |
| 492 (utf8-decode-tests #(#xf7 #xbf #xbf #xbf) "?") ; #x1fffff | |
| 493 (utf8-decode-tests #(#xf8 #x88 #x80 #x80 #x80) "?") ; #x200000 | |
| 494 (utf8-decode-tests #(#xfb #xbf #xbf #xbf #xbf) "?") ; #x3ffffff | |
| 495 (utf8-decode-tests #(#xfc #x84 #x80 #x80 #x80 #x80) "?") ; #x4000000e | |
| 496 (utf8-decode-tests #(#xfd #xbf #xbf #xbf #xbf #xbf) "?")) ; #x7fffffff | |
| 497 | |
| 498 (deftest utf8-unexpected-continuation-bytes () | |
| 499 (utf8-decode-tests #(#x80) "?") | |
| 500 (utf8-decode-tests #(#xbf) "?") | |
| 501 (utf8-decode-tests #(#x80 #xbf) "??") | |
| 502 (utf8-decode-tests #(#x80 #xbf #x80) "???") | |
| 503 (utf8-decode-tests #(#x80 #xbf #x80 #xbf) "????") | |
| 504 (utf8-decode-tests #(#x80 #xbf #x80 #xbf #x80) "?????") | |
| 505 (utf8-decode-tests #(#x80 #xbf #x80 #xbf #x80 #xbf) "??????") | |
| 506 (utf8-decode-tests #(#x80 #xbf #x80 #xbf #x80 #xbf #x80) "???????")) | |
| 507 | |
| 508 ;;; All 64 continuation bytes in a row. | |
| 509 (deftest utf8-continuation-bytes () | |
| 510 (apply #'utf8-decode-tests | |
| 511 (loop for i from #x80 to #xbf | |
| 512 collect i into bytes | |
| 513 collect #\? into chars | |
| 514 finally (return (list bytes | |
| 515 (coerce chars 'string)))))) | |
| 516 | |
| 517 (deftest utf8-lonely-start-characters () | |
| 518 (flet ((lsc (first last) | |
| 519 (apply #'utf8-decode-tests | |
| 520 (loop for i from first to last | |
| 521 nconc (list i 32) into bytes | |
| 522 nconc (list #\? #\Space) into chars | |
| 523 finally (return (list bytes (coerce chars 'strin… | |
| 524 (apply #'utf8-decode-tests | |
| 525 (loop for i from first to last | |
| 526 collect i into bytes | |
| 527 collect #\? into chars | |
| 528 finally (return | |
| 529 (list bytes (coerce chars 'string)))))… | |
| 530 (lsc #xc0 #xdf) ; 2-byte sequence start chars | |
| 531 (lsc #xe0 #xef) ; 3-byte | |
| 532 (lsc #xf0 #xf7) ; 4-byte | |
| 533 (lsc #xf8 #xfb) ; 5-byte | |
| 534 (lsc #xfc #xfd))) ; 6-byte | |
| 535 | |
| 536 ;;; Otherwise incomplete sequences (last continuation byte missing) | |
| 537 (deftest utf8-incomplete-sequences () | |
| 538 (utf8-decode-tests #0=#(#xc0) "?") | |
| 539 (utf8-decode-tests #1=#(#xe0 #x80) "?") | |
| 540 (utf8-decode-tests #2=#(#xf0 #x80 #x80) "?") | |
| 541 (utf8-decode-tests #3=#(#xf8 #x80 #x80 #x80) "?") | |
| 542 (utf8-decode-tests #4=#(#xfc #x80 #x80 #x80 #x80) "?") | |
| 543 (utf8-decode-tests #5=#(#xdf) "?") | |
| 544 (utf8-decode-tests #6=#(#xef #xbf) "?") | |
| 545 (utf8-decode-tests #7=#(#xf7 #xbf #xbf) "?") | |
| 546 (utf8-decode-tests #8=#(#xfb #xbf #xbf #xbf) "?") | |
| 547 (utf8-decode-tests #9=#(#xfd #xbf #xbf #xbf #xbf) "?") | |
| 548 ;; All ten previous tests concatenated | |
| 549 (utf8-decode-tests (concatenate 'vector | |
| 550 #0# #1# #2# #3# #4# #5# #6# #7# #8# #9… | |
| 551 "??????????")) | |
| 552 | |
| 553 (deftest utf8-random-impossible-bytes () | |
| 554 (utf8-decode-tests #(#xfe) "?") | |
| 555 (utf8-decode-tests #(#xff) "?") | |
| 556 (utf8-decode-tests #(#xfe #xfe #xff #xff) "????")) | |
| 557 | |
| 558 (deftest utf8-overlong-sequences-/ () | |
| 559 (utf8-decode-tests #(#xc0 #xaf) "?") | |
| 560 (utf8-decode-tests #(#xe0 #x80 #xaf) "?") | |
| 561 (utf8-decode-tests #(#xf0 #x80 #x80 #xaf) "?") | |
| 562 (utf8-decode-tests #(#xf8 #x80 #x80 #x80 #xaf) "?") | |
| 563 (utf8-decode-tests #(#xfc #x80 #x80 #x80 #x80 #xaf) "?")) | |
| 564 | |
| 565 (deftest utf8-overlong-sequences-rubout () | |
| 566 (utf8-decode-tests #(#xc1 #xbf) "?") | |
| 567 (utf8-decode-tests #(#xe0 #x9f #xbf) "?") | |
| 568 (utf8-decode-tests #(#xf0 #x8f #xbf #xbf) "?") | |
| 569 (utf8-decode-tests #(#xf8 #x87 #xbf #xbf #xbf) "?") | |
| 570 (utf8-decode-tests #(#xfc #x83 #xbf #xbf #xbf #xbf) "?")) | |
| 571 | |
| 572 (deftest utf8-overlong-sequences-null () | |
| 573 (utf8-decode-tests #(#xc0 #x80) "?") | |
| 574 (utf8-decode-tests #(#xe0 #x80 #x80) "?") | |
| 575 (utf8-decode-tests #(#xf0 #x80 #x80 #x80) "?") | |
| 576 (utf8-decode-tests #(#xf8 #x80 #x80 #x80 #x80) "?") | |
| 577 (utf8-decode-tests #(#xfc #x80 #x80 #x80 #x80 #x80) "?")) | |
| 578 | |
| 579 ;;;; End of adapted SBCL tests. | |
| 580 | |
| 581 ;;; Expected to fail, for now. | |
| 582 #+#:ignore | |
| 583 (deftest utf8-illegal-code-positions () | |
| 584 ;; single UTF-16 surrogates | |
| 585 (utf8-decode-tests #(#xed #xa0 #x80) "?") | |
| 586 (utf8-decode-tests #(#xed #xad #xbf) "?") | |
| 587 (utf8-decode-tests #(#xed #xae #x80) "?") | |
| 588 (utf8-decode-tests #(#xed #xaf #xbf) "?") | |
| 589 (utf8-decode-tests #(#xed #xb0 #x80) "?") | |
| 590 (utf8-decode-tests #(#xed #xbe #x80) "?") | |
| 591 (utf8-decode-tests #(#xed #xbf #xbf) "?") | |
| 592 ;; paired UTF-16 surrogates | |
| 593 (utf8-decode-tests #(ed a0 80 ed b0 80) "??") | |
| 594 (utf8-decode-tests #(ed a0 80 ed bf bf) "??") | |
| 595 (utf8-decode-tests #(ed ad bf ed b0 80) "??") | |
| 596 (utf8-decode-tests #(ed ad bf ed bf bf) "??") | |
| 597 (utf8-decode-tests #(ed ae 80 ed b0 80) "??") | |
| 598 (utf8-decode-tests #(ed ae 80 ed bf bf) "??") | |
| 599 (utf8-decode-tests #(ed af bf ed b0 80) "??") | |
| 600 (utf8-decode-tests #(ed af bf ed bf bf) "??") | |
| 601 ;; other illegal code positions | |
| 602 (utf8-decode-tests #(#xef #xbf #xbe) "?") ; #\uFFFE | |
| 603 (utf8-decode-tests #(#xef #xbf #xbf) "?")) ; #\uFFFF | |
| 604 | |
| 605 ;;; A list of the ISO-8859 encodings where each element is a cons with | |
| 606 ;;; the car being a keyword denoting the encoding and the cdr being a | |
| 607 ;;; vector enumerating the corresponding character codes. | |
| 608 ;;; | |
| 609 ;;; It was auto-generated from files which can be found at | |
| 610 ;;; <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/>. | |
| 611 ;;; | |
| 612 ;;; Taken from flexi-streams. | |
| 613 (defparameter *iso-8859-tables* | |
| 614 '((:iso-8859-1 . | |
| 615 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 616 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 617 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 618 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 619 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 620 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 621 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 622 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 1… | |
| 623 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 1… | |
| 624 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 2… | |
| 625 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 2… | |
| 626 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 2… | |
| 627 243 244 245 246 247 248 249 250 251 252 253 254 255)) | |
| 628 | |
| 629 (:iso-8859-2 . | |
| 630 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 631 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 632 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 633 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 634 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 635 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 636 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 637 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 3… | |
| 638 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 3… | |
| 639 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 2… | |
| 640 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 2… | |
| 641 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 3… | |
| 642 243 244 337 246 247 345 367 250 369 252 253 355 729)) | |
| 643 | |
| 644 (:iso-8859-3 . | |
| 645 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 646 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 647 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 648 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 649 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 650 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 651 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 652 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168… | |
| 653 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305… | |
| 654 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 2… | |
| 655 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219… | |
| 656 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236… | |
| 657 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365… | |
| 658 729)) | |
| 659 | |
| 660 (:iso-8859-4 . | |
| 661 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 662 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 663 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 664 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 665 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 666 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 667 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 668 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 3… | |
| 669 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 2… | |
| 670 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 2… | |
| 671 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 2… | |
| 672 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 3… | |
| 673 311 244 245 246 247 248 371 250 251 252 361 363 729)) | |
| 674 | |
| 675 (:iso-8859-5 . | |
| 676 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 677 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 678 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 679 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 680 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 681 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 682 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 683 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 103… | |
| 684 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1… | |
| 685 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 … | |
| 686 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 … | |
| 687 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 … | |
| 688 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 … | |
| 689 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 … | |
| 690 167 1118 1119)) | |
| 691 | |
| 692 (:iso-8859-6 . | |
| 693 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 694 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 695 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 696 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 697 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 698 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 699 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 700 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533… | |
| 701 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65… | |
| 702 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 … | |
| 703 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580… | |
| 704 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 … | |
| 705 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1… | |
| 706 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 655… | |
| 707 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) | |
| 708 | |
| 709 (:iso-8859-7 . | |
| 710 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 711 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 712 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 713 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 714 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 715 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 716 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 717 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 1… | |
| 718 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 90… | |
| 719 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 9… | |
| 720 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940… | |
| 721 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 9… | |
| 722 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533… | |
| 723 | |
| 724 (:iso-8859-8 . | |
| 725 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 726 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 727 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 728 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 729 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 730 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 731 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 732 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168… | |
| 733 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 2… | |
| 734 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533… | |
| 735 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533… | |
| 736 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 … | |
| 737 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 … | |
| 738 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533… | |
| 739 8206 8207 65533)) | |
| 740 | |
| 741 (:iso-8859-9 . | |
| 742 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 743 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 744 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 745 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 746 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 747 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 748 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 749 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 1… | |
| 750 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 1… | |
| 751 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 2… | |
| 752 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 2… | |
| 753 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 2… | |
| 754 243 244 245 246 247 248 249 250 251 252 305 351 255)) | |
| 755 | |
| 756 (:iso-8859-10 . | |
| 757 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 758 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 759 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 760 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 761 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 762 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 763 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 764 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 2… | |
| 765 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 3… | |
| 766 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 … | |
| 767 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 2… | |
| 768 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 3… | |
| 769 243 244 245 246 361 248 371 250 251 252 253 254 312)) | |
| 770 | |
| 771 (:iso-8859-11 . | |
| 772 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 773 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 774 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 775 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 776 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 777 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 778 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 779 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 359… | |
| 780 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 … | |
| 781 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 … | |
| 782 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 … | |
| 783 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3… | |
| 784 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 … | |
| 785 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 … | |
| 786 65533 65533 65533 65533)) | |
| 787 | |
| 788 (:iso-8859-13 . | |
| 789 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 790 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 791 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 792 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 793 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 794 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 795 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 796 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216… | |
| 797 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 … | |
| 798 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 2… | |
| 799 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 3… | |
| 800 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 3… | |
| 801 326 243 333 245 246 247 371 322 347 363 252 380 382 8217)) | |
| 802 | |
| 803 (:iso-8859-14 . | |
| 804 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 805 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 806 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 807 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 808 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 809 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 810 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 811 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 78… | |
| 812 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7… | |
| 813 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 19… | |
| 814 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 … | |
| 815 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 2… | |
| 816 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 … | |
| 817 255)) | |
| 818 | |
| 819 (:iso-8859-15 . | |
| 820 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 821 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 822 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 823 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 824 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 825 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 826 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 827 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 … | |
| 828 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 1… | |
| 829 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 2… | |
| 830 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 2… | |
| 831 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 2… | |
| 832 243 244 245 246 247 248 249 250 251 252 253 254 255)) | |
| 833 | |
| 834 (:iso-8859-16 . | |
| 835 #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 … | |
| 836 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48… | |
| 837 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72… | |
| 838 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96… | |
| 839 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 11… | |
| 840 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 1… | |
| 841 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 1… | |
| 842 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353… | |
| 843 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 … | |
| 844 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 2… | |
| 845 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 5… | |
| 846 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 2… | |
| 847 242 243 244 337 246 347 369 249 250 251 252 281 539 255)))) | |
| 848 | |
| 849 (deftest iso-8859-decode-check () | |
| 850 (loop for enc in *iso-8859-charsets* | |
| 851 for octets = (let ((octets (make-ub8-vector 256))) | |
| 852 (dotimes (i 256 octets) | |
| 853 (setf (aref octets i) i))) | |
| 854 for string = (octets-to-string octets :encoding enc) | |
| 855 do (is (equalp (map 'vector #'char-code string) | |
| 856 (cdr (assoc enc *iso-8859-tables*)))))) | |
| 857 | |
| 858 (deftest character-out-of-range.utf-32 () | |
| 859 (signals character-out-of-range | |
| 860 (octets-to-string (ub8v 0 0 #xfe #xff 0 #x11 0 0) | |
| 861 :encoding :utf-32 :errorp t))) | |
| 862 | |
| 863 ;;; RT: encoders and decoders were returning bogus values. | |
| 864 (deftest encoder/decoder-retvals (encoding &optional (test-string "abc")) | |
| 865 (let* ((mapping (lookup-mapping babel::*string-vector-mappings* encodi… | |
| 866 (strlen (length test-string)) | |
| 867 ;; encoding | |
| 868 (octet-precount (funcall (octet-counter mapping) | |
| 869 test-string 0 strlen -1)) | |
| 870 (array (make-array octet-precount :element-type '(unsigned-byte… | |
| 871 (encoded-octet-count (funcall (encoder mapping) | |
| 872 test-string 0 strlen array 0)) | |
| 873 ;; decoding | |
| 874 (string (make-string strlen)) | |
| 875 (char-precount (funcall (code-point-counter mapping) | |
| 876 array 0 octet-precount -1)) | |
| 877 (char-count (funcall (decoder mapping) | |
| 878 array 0 octet-precount string 0))) | |
| 879 (is (= octet-precount encoded-octet-count)) | |
| 880 (is (= char-precount char-count)) | |
| 881 (is (string= test-string string)))) | |
| 882 | |
| 883 (deftest encoder-and-decoder-return-values () | |
| 884 (mapcar 'encoder/decoder-retvals | |
| 885 (remove-if 'ambiguous-encoding-p | |
| 886 (list-character-encodings)))) | |
| 887 | |
| 888 (deftest code-point-sweep (encoding) | |
| 889 (finishes | |
| 890 (dotimes (i char-code-limit) | |
| 891 (let ((char (ignore-errors (code-char i)))) | |
| 892 (when char | |
| 893 (ignore-some-conditions (character-encoding-error) | |
| 894 (string-to-octets (string char) :encoding encoding))))))) | |
| 895 | |
| 896 #+enable-slow-babel-tests | |
| 897 (deftest code-point-sweep-all-encodings () | |
| 898 (mapc #'code-point-sweep (list-character-encodings))) | |
| 899 | |
| 900 (deftest octet-sweep (encoding) | |
| 901 (finishes | |
| 902 (loop for b1 upto #xff do | |
| 903 (loop for b2 upto #xff do | |
| 904 (loop for b3 upto #xff do | |
| 905 (loop for b4 upto #xff do | |
| 906 (ignore-some-conditions (character-decoding-error) | |
| 907 (octets-to-string (ub8v b1 b2 b3 b4) :encoding encoding)))… | |
| 908 | |
| 909 #+enable-slow-babel-tests | |
| 910 (deftest octet-sweep-all-encodings () | |
| 911 (mapc #'octet-sweep (list-character-encodings))) |