| tlength.lisp - clic - Clic is an command line interactive client for gopher wri… | |
| git clone git://bitreich.org/clic/ git://hg6vgqziawt5s4dj.onion/clic/ | |
| Log | |
| Files | |
| Refs | |
| Tags | |
| LICENSE | |
| --- | |
| tlength.lisp (17499B) | |
| --- | |
| 1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 1… | |
| 2 ;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.6 2008/05/2… | |
| 3 | |
| 4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. | |
| 5 | |
| 6 ;;; Redistribution and use in source and binary forms, with or without | |
| 7 ;;; modification, are permitted provided that the following conditions | |
| 8 ;;; are met: | |
| 9 | |
| 10 ;;; * Redistributions of source code must retain the above copyright | |
| 11 ;;; notice, this list of conditions and the following disclaimer. | |
| 12 | |
| 13 ;;; * Redistributions in binary form must reproduce the above | |
| 14 ;;; copyright notice, this list of conditions and the following | |
| 15 ;;; disclaimer in the documentation and/or other materials | |
| 16 ;;; provided with the distribution. | |
| 17 | |
| 18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED | |
| 19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
| 22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | |
| 24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
| 26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 | |
| 30 (in-package :flexi-streams) | |
| 31 | |
| 32 (defgeneric encoding-factor (format) | |
| 33 (:documentation "Given an external format FORMAT, returns a factor | |
| 34 which denotes the octets to characters ratio to expect when | |
| 35 encoding/decoding. If the returned value is an integer, the factor is | |
| 36 assumed to be exact. If it is a \(double) float, the factor is | |
| 37 supposed to be based on heuristics and usually not exact. | |
| 38 | |
| 39 This factor is used in string.lisp.") | |
| 40 (declare #.*standard-optimize-settings*)) | |
| 41 | |
| 42 (defmethod encoding-factor ((format flexi-8-bit-format)) | |
| 43 (declare #.*standard-optimize-settings*) | |
| 44 ;; 8-bit encodings map octets to characters in an exact one-to-one | |
| 45 ;; fashion | |
| 46 1) | |
| 47 | |
| 48 (defmethod encoding-factor ((format flexi-utf-8-format)) | |
| 49 (declare #.*standard-optimize-settings*) | |
| 50 ;; UTF-8 characters can be anything from one to six octets, but we | |
| 51 ;; assume that the "overhead" is only about 5 percent - this | |
| 52 ;; estimate is obviously very much dependant on the content | |
| 53 1.05d0) | |
| 54 | |
| 55 (defmethod encoding-factor ((format flexi-utf-16-format)) | |
| 56 (declare #.*standard-optimize-settings*) | |
| 57 ;; usually one character maps to two octets, but characters with | |
| 58 ;; code points above #x10000 map to four octets - we assume that we | |
| 59 ;; usually don't see these characters but of course have to return a | |
| 60 ;; float | |
| 61 2.0d0) | |
| 62 | |
| 63 (defmethod encoding-factor ((format flexi-utf-32-format)) | |
| 64 (declare #.*standard-optimize-settings*) | |
| 65 ;; UTF-32 always matches every character to four octets | |
| 66 4) | |
| 67 | |
| 68 (defmethod encoding-factor ((format flexi-crlf-mixin)) | |
| 69 (declare #.*standard-optimize-settings*) | |
| 70 ;; if the sequence #\Return #\Linefeed is the line-end marker, this | |
| 71 ;; obviously makes encodings potentially longer and definitely makes | |
| 72 ;; the estimate unexact | |
| 73 (* 1.02d0 (call-next-method))) | |
| 74 | |
| 75 (defgeneric check-end (format start end i) | |
| 76 (declare #.*fixnum-optimize-settings*) | |
| 77 (:documentation "Helper function used below to determine if we tried | |
| 78 to read past the end of the sequence.") | |
| 79 (:method (format start end i) | |
| 80 (declare #.*fixnum-optimize-settings*) | |
| 81 (declare (ignore start)) | |
| 82 (declare (fixnum end i)) | |
| 83 (when (> i end) | |
| 84 (signal-encoding-error format "This sequence can't be decoded ~ | |
| 85 using ~A as it is too short. ~A octet~:P missing at the end." | |
| 86 (external-format-name format) | |
| 87 (- i end)))) | |
| 88 (:method ((format flexi-utf-16-format) start end i) | |
| 89 (declare #.*fixnum-optimize-settings*) | |
| 90 (declare (fixnum start end i)) | |
| 91 (declare (ignore i)) | |
| 92 ;; don't warn twice | |
| 93 (when (evenp (- end start)) | |
| 94 (call-next-method)))) | |
| 95 | |
| 96 (defgeneric compute-number-of-chars (format sequence start end) | |
| 97 (declare #.*standard-optimize-settings*) | |
| 98 (:documentation "Computes the exact number of characters required to | |
| 99 decode the sequence of octets in SEQUENCE from START to END using the | |
| 100 external format FORMAT.")) | |
| 101 | |
| 102 (defmethod compute-number-of-chars :around (format (list list) start end) | |
| 103 (declare #.*standard-optimize-settings*) | |
| 104 (call-next-method format (coerce list 'vector) start end)) | |
| 105 | |
| 106 (defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence… | |
| 107 (declare #.*fixnum-optimize-settings*) | |
| 108 (declare (fixnum start end)) | |
| 109 (declare (ignore sequence)) | |
| 110 (- end start)) | |
| 111 | |
| 112 (defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence s… | |
| 113 ;; this method only applies to the 8-bit formats as all other | |
| 114 ;; formats with CRLF line endings have their own specialized methods | |
| 115 ;; below | |
| 116 (declare #.*fixnum-optimize-settings*) | |
| 117 (declare (fixnum start end) (vector sequence)) | |
| 118 (let ((i start) | |
| 119 (length (- end start))) | |
| 120 (declare (fixnum i length)) | |
| 121 (loop | |
| 122 (when (>= i end) | |
| 123 (return)) | |
| 124 (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :en… | |
| 125 (unless position | |
| 126 (return)) | |
| 127 (setq i (1+ position)) | |
| 128 (decf length))) | |
| 129 length)) | |
| 130 | |
| 131 (defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence… | |
| 132 (declare #.*fixnum-optimize-settings*) | |
| 133 (declare (fixnum start end) (vector sequence)) | |
| 134 (let ((sum 0) | |
| 135 (i start)) | |
| 136 (declare (fixnum i sum)) | |
| 137 (loop | |
| 138 (when (>= i end) | |
| 139 (return)) | |
| 140 (let* ((octet (aref sequence i)) | |
| 141 ;; note that there are no validity checks here | |
| 142 (length (cond ((not (logbitp 7 octet)) 1) | |
| 143 ((= #b11000000 (logand* octet #b11100000)) 2) | |
| 144 ((= #b11100000 (logand* octet #b11110000)) 3) | |
| 145 (t 4)))) | |
| 146 (declare (fixnum length) (type octet octet)) | |
| 147 (incf sum) | |
| 148 (incf i length))) | |
| 149 (check-end format start end i) | |
| 150 sum)) | |
| 151 | |
| 152 (defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) seq… | |
| 153 (declare #.*fixnum-optimize-settings*) | |
| 154 (declare (fixnum start end) (vector sequence)) | |
| 155 (let ((sum 0) | |
| 156 (i start) | |
| 157 (last-octet 0)) | |
| 158 (declare (fixnum i sum) (type octet last-octet)) | |
| 159 (loop | |
| 160 (when (>= i end) | |
| 161 (return)) | |
| 162 (let* ((octet (aref sequence i)) | |
| 163 ;; note that there are no validity checks here | |
| 164 (length (cond ((not (logbitp 7 octet)) 1) | |
| 165 ((= #b11000000 (logand* octet #b11100000)) 2) | |
| 166 ((= #b11100000 (logand* octet #b11110000)) 3) | |
| 167 (t 4)))) | |
| 168 (declare (fixnum length) (type octet octet)) | |
| 169 (unless (and (= octet +lf+) (= last-octet +cr+)) | |
| 170 (incf sum)) | |
| 171 (incf i length) | |
| 172 (setq last-octet octet))) | |
| 173 (check-end format start end i) | |
| 174 sum)) | |
| 175 | |
| 176 (defmethod compute-number-of-chars :before ((format flexi-utf-16-format)… | |
| 177 (declare #.*fixnum-optimize-settings*) | |
| 178 (declare (fixnum start end) (vector sequence)) | |
| 179 (declare (ignore sequence)) | |
| 180 (when (oddp (- end start)) | |
| 181 (signal-encoding-error format "~A octet~:P cannot be decoded ~ | |
| 182 using UTF-16 as ~:*~A is not even." | |
| 183 (- end start)))) | |
| 184 | |
| 185 (defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequ… | |
| 186 (declare #.*fixnum-optimize-settings*) | |
| 187 (declare (fixnum start end)) | |
| 188 (let ((sum 0) | |
| 189 (i start)) | |
| 190 (declare (fixnum i sum)) | |
| 191 (decf end 2) | |
| 192 (loop | |
| 193 (when (> i end) | |
| 194 (return)) | |
| 195 (let* ((high-octet (aref sequence (1+ i))) | |
| 196 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
| 197 (t 2)))) | |
| 198 (declare (fixnum length) (type octet high-octet)) | |
| 199 (incf sum) | |
| 200 (incf i length))) | |
| 201 (check-end format start (+ end 2) i) | |
| 202 sum)) | |
| 203 | |
| 204 (defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequ… | |
| 205 (declare #.*fixnum-optimize-settings*) | |
| 206 (declare (fixnum start end) (vector sequence)) | |
| 207 (let ((sum 0) | |
| 208 (i start)) | |
| 209 (declare (fixnum i sum)) | |
| 210 (decf end 2) | |
| 211 (loop | |
| 212 (when (> i end) | |
| 213 (return)) | |
| 214 (let* ((high-octet (aref sequence i)) | |
| 215 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
| 216 (t 2)))) | |
| 217 (declare (fixnum length) (type octet high-octet)) | |
| 218 (incf sum) | |
| 219 (incf i length))) | |
| 220 (check-end format start (+ end 2) i) | |
| 221 sum)) | |
| 222 | |
| 223 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format)… | |
| 224 (declare #.*fixnum-optimize-settings*) | |
| 225 (declare (fixnum start end) (vector sequence)) | |
| 226 (let ((sum 0) | |
| 227 (i start) | |
| 228 (last-octet 0)) | |
| 229 (declare (fixnum i sum) (type octet last-octet)) | |
| 230 (decf end 2) | |
| 231 (loop | |
| 232 (when (> i end) | |
| 233 (return)) | |
| 234 (let* ((high-octet (aref sequence (1+ i))) | |
| 235 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
| 236 (t 2)))) | |
| 237 (declare (fixnum length) (type octet high-octet)) | |
| 238 (unless (and (zerop high-octet) | |
| 239 (= (the octet (aref sequence i)) +lf+) | |
| 240 (= last-octet +cr+)) | |
| 241 (incf sum)) | |
| 242 (setq last-octet (if (zerop high-octet) | |
| 243 (aref sequence i) | |
| 244 0)) | |
| 245 (incf i length))) | |
| 246 (check-end format start (+ end 2) i) | |
| 247 sum)) | |
| 248 | |
| 249 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format)… | |
| 250 (declare #.*fixnum-optimize-settings*) | |
| 251 (declare (fixnum start end) (vector sequence)) | |
| 252 (let ((sum 0) | |
| 253 (i start) | |
| 254 (last-octet 0)) | |
| 255 (declare (fixnum i sum) (type octet last-octet)) | |
| 256 (decf end 2) | |
| 257 (loop | |
| 258 (when (> i end) | |
| 259 (return)) | |
| 260 (let* ((high-octet (aref sequence i)) | |
| 261 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
| 262 (t 2)))) | |
| 263 (declare (fixnum length) (type octet high-octet)) | |
| 264 (unless (and (zerop high-octet) | |
| 265 (= (the octet (aref sequence (1+ i))) +lf+) | |
| 266 (= last-octet +cr+)) | |
| 267 (incf sum)) | |
| 268 (setq last-octet (if (zerop high-octet) | |
| 269 (aref sequence (1+ i)) | |
| 270 0)) | |
| 271 (incf i length))) | |
| 272 (check-end format start (+ end 2) i) | |
| 273 sum)) | |
| 274 | |
| 275 (defmethod compute-number-of-chars :before ((format flexi-utf-32-format)… | |
| 276 (declare #.*fixnum-optimize-settings*) | |
| 277 (declare (fixnum start end)) | |
| 278 (declare (ignore sequence)) | |
| 279 (let ((length (- end start))) | |
| 280 (when (plusp (mod length 4)) | |
| 281 (signal-encoding-error format "~A octet~:P cannot be decoded ~ | |
| 282 using UTF-32 as ~:*~A is not a multiple-value of four." | |
| 283 length)))) | |
| 284 | |
| 285 (defmethod compute-number-of-chars ((format flexi-utf-32-format) sequenc… | |
| 286 (declare #.*fixnum-optimize-settings*) | |
| 287 (declare (fixnum start end)) | |
| 288 (declare (ignore sequence)) | |
| 289 (ceiling (- end start) 4)) | |
| 290 | |
| 291 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format)… | |
| 292 (declare #.*fixnum-optimize-settings*) | |
| 293 (declare (fixnum start end) (vector sequence)) | |
| 294 (let ((i start) | |
| 295 (length (ceiling (- end start) 4))) | |
| 296 (decf end 8) | |
| 297 (loop | |
| 298 (when (> i end) | |
| 299 (return)) | |
| 300 (cond ((loop for j of-type fixnum from i | |
| 301 for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) | |
| 302 always (= octet (aref sequence j))) | |
| 303 (decf length) | |
| 304 (incf i 8)) | |
| 305 (t (incf i 4)))) | |
| 306 length)) | |
| 307 | |
| 308 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format)… | |
| 309 (declare #.*fixnum-optimize-settings*) | |
| 310 (declare (fixnum start end) (vector sequence)) | |
| 311 (let ((i start) | |
| 312 (length (ceiling (- end start) 4))) | |
| 313 (decf end 8) | |
| 314 (loop | |
| 315 (when (> i end) | |
| 316 (return)) | |
| 317 (cond ((loop for j of-type fixnum from i | |
| 318 for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) | |
| 319 always (= octet (aref sequence j))) | |
| 320 (decf length) | |
| 321 (incf i 8)) | |
| 322 (t (incf i 4)))) | |
| 323 length)) | |
| 324 | |
| 325 (defgeneric compute-number-of-octets (format sequence start end) | |
| 326 (declare #.*standard-optimize-settings*) | |
| 327 (:documentation "Computes the exact number of octets required to | |
| 328 encode the sequence of characters in SEQUENCE from START to END using | |
| 329 the external format FORMAT.")) | |
| 330 | |
| 331 (defmethod compute-number-of-octets :around (format (list list) start en… | |
| 332 (declare #.*standard-optimize-settings*) | |
| 333 (call-next-method format (coerce list 'string*) start end)) | |
| 334 | |
| 335 (defmethod compute-number-of-octets ((format flexi-8-bit-format) string … | |
| 336 (declare #.*fixnum-optimize-settings*) | |
| 337 (declare (fixnum start end)) | |
| 338 (declare (ignore string)) | |
| 339 (- end start)) | |
| 340 | |
| 341 (defmethod compute-number-of-octets ((format flexi-utf-8-format) string … | |
| 342 (declare #.*fixnum-optimize-settings*) | |
| 343 (declare (fixnum start end) (string string)) | |
| 344 (let ((sum 0) | |
| 345 (i start)) | |
| 346 (declare (fixnum i sum)) | |
| 347 (loop | |
| 348 (when (>= i end) | |
| 349 (return)) | |
| 350 (let* ((char-code (char-code (char string i))) | |
| 351 (char-length (cond ((< char-code #x80) 1) | |
| 352 ((< char-code #x800) 2) | |
| 353 ((< char-code #x10000) 3) | |
| 354 (t 4)))) | |
| 355 (declare (fixnum char-length) (type char-code-integer char-code)) | |
| 356 (incf sum char-length) | |
| 357 (incf i))) | |
| 358 sum)) | |
| 359 | |
| 360 (defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) st… | |
| 361 (declare #.*fixnum-optimize-settings*) | |
| 362 (declare (fixnum start end) (string string)) | |
| 363 (let ((sum 0) | |
| 364 (i start)) | |
| 365 (declare (fixnum i sum)) | |
| 366 (loop | |
| 367 (when (>= i end) | |
| 368 (return)) | |
| 369 (let* ((char-code (char-code (char string i))) | |
| 370 (char-length (cond ((= char-code #.(char-code #\Newline)) 2) | |
| 371 ((< char-code #x80) 1) | |
| 372 ((< char-code #x800) 2) | |
| 373 ((< char-code #x10000) 3) | |
| 374 (t 4)))) | |
| 375 (declare (fixnum char-length) (type char-code-integer char-code)) | |
| 376 (incf sum char-length) | |
| 377 (incf i))) | |
| 378 sum)) | |
| 379 | |
| 380 (defmethod compute-number-of-octets ((format flexi-utf-16-format) string… | |
| 381 (declare #.*fixnum-optimize-settings*) | |
| 382 (declare (fixnum start end) (string string)) | |
| 383 (let ((sum 0) | |
| 384 (i start)) | |
| 385 (declare (fixnum i sum)) | |
| 386 (loop | |
| 387 (when (>= i end) | |
| 388 (return)) | |
| 389 (let* ((char-code (char-code (char string i))) | |
| 390 (char-length (cond ((< char-code #x10000) 2) | |
| 391 (t 4)))) | |
| 392 (declare (fixnum char-length) (type char-code-integer char-code)) | |
| 393 (incf sum char-length) | |
| 394 (incf i))) | |
| 395 sum)) | |
| 396 | |
| 397 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format… | |
| 398 (declare #.*fixnum-optimize-settings*) | |
| 399 (declare (fixnum start end) (string string)) | |
| 400 (let ((sum 0) | |
| 401 (i start)) | |
| 402 (declare (fixnum i sum)) | |
| 403 (loop | |
| 404 (when (>= i end) | |
| 405 (return)) | |
| 406 (let* ((char-code (char-code (char string i))) | |
| 407 (char-length (cond ((= char-code #.(char-code #\Newline)) 4) | |
| 408 ((< char-code #x10000) 2) | |
| 409 (t 4)))) | |
| 410 (declare (fixnum char-length) (type char-code-integer char-code)) | |
| 411 (incf sum char-length) | |
| 412 (incf i))) | |
| 413 sum)) | |
| 414 | |
| 415 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format… | |
| 416 (declare #.*fixnum-optimize-settings*) | |
| 417 (declare (fixnum start end) (string string)) | |
| 418 (let ((sum 0) | |
| 419 (i start)) | |
| 420 (declare (fixnum i sum)) | |
| 421 (loop | |
| 422 (when (>= i end) | |
| 423 (return)) | |
| 424 (let* ((char-code (char-code (char string i))) | |
| 425 (char-length (cond ((= char-code #.(char-code #\Newline)) 4) | |
| 426 ((< char-code #x10000) 2) | |
| 427 (t 4)))) | |
| 428 (declare (fixnum char-length) (type char-code-integer char-code)) | |
| 429 (incf sum char-length) | |
| 430 (incf i))) | |
| 431 sum)) | |
| 432 | |
| 433 (defmethod compute-number-of-octets ((format flexi-utf-32-format) string… | |
| 434 (declare #.*fixnum-optimize-settings*) | |
| 435 (declare (fixnum start end)) | |
| 436 (declare (ignore string)) | |
| 437 (* 4 (- end start))) | |
| 438 | |
| 439 (defmethod compute-number-of-octets ((format flexi-crlf-mixin) string st… | |
| 440 (declare #.*fixnum-optimize-settings*) | |
| 441 (declare (fixnum start end) (string string)) | |
| 442 (+ (call-next-method) | |
| 443 (* (case (external-format-name format) | |
| 444 (:utf-32 4) | |
| 445 (otherwise 1)) | |
| 446 (count #\Newline string :start start :end end :test #'char=)))) | |
| 447 | |
| 448 (defgeneric character-length (format char) | |
| 449 (declare #.*fixnum-optimize-settings*) | |
| 450 (:documentation "Returns the number of octets needed to encode the | |
| 451 single character CHAR.") | |
| 452 (:method (format char) | |
| 453 (compute-number-of-octets format (string char) 0 1))) | |
| 454 | |
| 455 (defmethod character-length :around ((format flexi-crlf-mixin) (char (eq… | |
| 456 (declare #.*fixnum-optimize-settings*) | |
| 457 (+ (call-next-method format +cr+) | |
| 458 (call-next-method format +lf+))) | |
| 459 | |
| 460 (defmethod character-length ((format flexi-8-bit-format) char) | |
| 461 (declare #.*fixnum-optimize-settings*) | |
| 462 (declare (ignore char)) | |
| 463 1) | |
| 464 | |
| 465 (defmethod character-length ((format flexi-utf-32-format) char) | |
| 466 (declare #.*fixnum-optimize-settings*) | |
| 467 (declare (ignore char)) | |
| 468 |