length.lisp - clic - Clic is an command line interactive client for gopher writ… | |
git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
LICENSE | |
--- | |
length.lisp (17499B) | |
--- | |
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 1… | |
2 ;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.6 2008/05/2… | |
3 | |
4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. | |
5 | |
6 ;;; Redistribution and use in source and binary forms, with or without | |
7 ;;; modification, are permitted provided that the following conditions | |
8 ;;; are met: | |
9 | |
10 ;;; * Redistributions of source code must retain the above copyright | |
11 ;;; notice, this list of conditions and the following disclaimer. | |
12 | |
13 ;;; * Redistributions in binary form must reproduce the above | |
14 ;;; copyright notice, this list of conditions and the following | |
15 ;;; disclaimer in the documentation and/or other materials | |
16 ;;; provided with the distribution. | |
17 | |
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED | |
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | |
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | |
30 (in-package :flexi-streams) | |
31 | |
32 (defgeneric encoding-factor (format) | |
33 (:documentation "Given an external format FORMAT, returns a factor | |
34 which denotes the octets to characters ratio to expect when | |
35 encoding/decoding. If the returned value is an integer, the factor is | |
36 assumed to be exact. If it is a \(double) float, the factor is | |
37 supposed to be based on heuristics and usually not exact. | |
38 | |
39 This factor is used in string.lisp.") | |
40 (declare #.*standard-optimize-settings*)) | |
41 | |
42 (defmethod encoding-factor ((format flexi-8-bit-format)) | |
43 (declare #.*standard-optimize-settings*) | |
44 ;; 8-bit encodings map octets to characters in an exact one-to-one | |
45 ;; fashion | |
46 1) | |
47 | |
48 (defmethod encoding-factor ((format flexi-utf-8-format)) | |
49 (declare #.*standard-optimize-settings*) | |
50 ;; UTF-8 characters can be anything from one to six octets, but we | |
51 ;; assume that the "overhead" is only about 5 percent - this | |
52 ;; estimate is obviously very much dependant on the content | |
53 1.05d0) | |
54 | |
55 (defmethod encoding-factor ((format flexi-utf-16-format)) | |
56 (declare #.*standard-optimize-settings*) | |
57 ;; usually one character maps to two octets, but characters with | |
58 ;; code points above #x10000 map to four octets - we assume that we | |
59 ;; usually don't see these characters but of course have to return a | |
60 ;; float | |
61 2.0d0) | |
62 | |
63 (defmethod encoding-factor ((format flexi-utf-32-format)) | |
64 (declare #.*standard-optimize-settings*) | |
65 ;; UTF-32 always matches every character to four octets | |
66 4) | |
67 | |
68 (defmethod encoding-factor ((format flexi-crlf-mixin)) | |
69 (declare #.*standard-optimize-settings*) | |
70 ;; if the sequence #\Return #\Linefeed is the line-end marker, this | |
71 ;; obviously makes encodings potentially longer and definitely makes | |
72 ;; the estimate unexact | |
73 (* 1.02d0 (call-next-method))) | |
74 | |
75 (defgeneric check-end (format start end i) | |
76 (declare #.*fixnum-optimize-settings*) | |
77 (:documentation "Helper function used below to determine if we tried | |
78 to read past the end of the sequence.") | |
79 (:method (format start end i) | |
80 (declare #.*fixnum-optimize-settings*) | |
81 (declare (ignore start)) | |
82 (declare (fixnum end i)) | |
83 (when (> i end) | |
84 (signal-encoding-error format "This sequence can't be decoded ~ | |
85 using ~A as it is too short. ~A octet~:P missing at the end." | |
86 (external-format-name format) | |
87 (- i end)))) | |
88 (:method ((format flexi-utf-16-format) start end i) | |
89 (declare #.*fixnum-optimize-settings*) | |
90 (declare (fixnum start end i)) | |
91 (declare (ignore i)) | |
92 ;; don't warn twice | |
93 (when (evenp (- end start)) | |
94 (call-next-method)))) | |
95 | |
96 (defgeneric compute-number-of-chars (format sequence start end) | |
97 (declare #.*standard-optimize-settings*) | |
98 (:documentation "Computes the exact number of characters required to | |
99 decode the sequence of octets in SEQUENCE from START to END using the | |
100 external format FORMAT.")) | |
101 | |
102 (defmethod compute-number-of-chars :around (format (list list) start end) | |
103 (declare #.*standard-optimize-settings*) | |
104 (call-next-method format (coerce list 'vector) start end)) | |
105 | |
106 (defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence… | |
107 (declare #.*fixnum-optimize-settings*) | |
108 (declare (fixnum start end)) | |
109 (declare (ignore sequence)) | |
110 (- end start)) | |
111 | |
112 (defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence s… | |
113 ;; this method only applies to the 8-bit formats as all other | |
114 ;; formats with CRLF line endings have their own specialized methods | |
115 ;; below | |
116 (declare #.*fixnum-optimize-settings*) | |
117 (declare (fixnum start end) (vector sequence)) | |
118 (let ((i start) | |
119 (length (- end start))) | |
120 (declare (fixnum i length)) | |
121 (loop | |
122 (when (>= i end) | |
123 (return)) | |
124 (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :en… | |
125 (unless position | |
126 (return)) | |
127 (setq i (1+ position)) | |
128 (decf length))) | |
129 length)) | |
130 | |
131 (defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence… | |
132 (declare #.*fixnum-optimize-settings*) | |
133 (declare (fixnum start end) (vector sequence)) | |
134 (let ((sum 0) | |
135 (i start)) | |
136 (declare (fixnum i sum)) | |
137 (loop | |
138 (when (>= i end) | |
139 (return)) | |
140 (let* ((octet (aref sequence i)) | |
141 ;; note that there are no validity checks here | |
142 (length (cond ((not (logbitp 7 octet)) 1) | |
143 ((= #b11000000 (logand* octet #b11100000)) 2) | |
144 ((= #b11100000 (logand* octet #b11110000)) 3) | |
145 (t 4)))) | |
146 (declare (fixnum length) (type octet octet)) | |
147 (incf sum) | |
148 (incf i length))) | |
149 (check-end format start end i) | |
150 sum)) | |
151 | |
152 (defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) seq… | |
153 (declare #.*fixnum-optimize-settings*) | |
154 (declare (fixnum start end) (vector sequence)) | |
155 (let ((sum 0) | |
156 (i start) | |
157 (last-octet 0)) | |
158 (declare (fixnum i sum) (type octet last-octet)) | |
159 (loop | |
160 (when (>= i end) | |
161 (return)) | |
162 (let* ((octet (aref sequence i)) | |
163 ;; note that there are no validity checks here | |
164 (length (cond ((not (logbitp 7 octet)) 1) | |
165 ((= #b11000000 (logand* octet #b11100000)) 2) | |
166 ((= #b11100000 (logand* octet #b11110000)) 3) | |
167 (t 4)))) | |
168 (declare (fixnum length) (type octet octet)) | |
169 (unless (and (= octet +lf+) (= last-octet +cr+)) | |
170 (incf sum)) | |
171 (incf i length) | |
172 (setq last-octet octet))) | |
173 (check-end format start end i) | |
174 sum)) | |
175 | |
176 (defmethod compute-number-of-chars :before ((format flexi-utf-16-format)… | |
177 (declare #.*fixnum-optimize-settings*) | |
178 (declare (fixnum start end) (vector sequence)) | |
179 (declare (ignore sequence)) | |
180 (when (oddp (- end start)) | |
181 (signal-encoding-error format "~A octet~:P cannot be decoded ~ | |
182 using UTF-16 as ~:*~A is not even." | |
183 (- end start)))) | |
184 | |
185 (defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequ… | |
186 (declare #.*fixnum-optimize-settings*) | |
187 (declare (fixnum start end)) | |
188 (let ((sum 0) | |
189 (i start)) | |
190 (declare (fixnum i sum)) | |
191 (decf end 2) | |
192 (loop | |
193 (when (> i end) | |
194 (return)) | |
195 (let* ((high-octet (aref sequence (1+ i))) | |
196 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
197 (t 2)))) | |
198 (declare (fixnum length) (type octet high-octet)) | |
199 (incf sum) | |
200 (incf i length))) | |
201 (check-end format start (+ end 2) i) | |
202 sum)) | |
203 | |
204 (defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequ… | |
205 (declare #.*fixnum-optimize-settings*) | |
206 (declare (fixnum start end) (vector sequence)) | |
207 (let ((sum 0) | |
208 (i start)) | |
209 (declare (fixnum i sum)) | |
210 (decf end 2) | |
211 (loop | |
212 (when (> i end) | |
213 (return)) | |
214 (let* ((high-octet (aref sequence i)) | |
215 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
216 (t 2)))) | |
217 (declare (fixnum length) (type octet high-octet)) | |
218 (incf sum) | |
219 (incf i length))) | |
220 (check-end format start (+ end 2) i) | |
221 sum)) | |
222 | |
223 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format)… | |
224 (declare #.*fixnum-optimize-settings*) | |
225 (declare (fixnum start end) (vector sequence)) | |
226 (let ((sum 0) | |
227 (i start) | |
228 (last-octet 0)) | |
229 (declare (fixnum i sum) (type octet last-octet)) | |
230 (decf end 2) | |
231 (loop | |
232 (when (> i end) | |
233 (return)) | |
234 (let* ((high-octet (aref sequence (1+ i))) | |
235 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
236 (t 2)))) | |
237 (declare (fixnum length) (type octet high-octet)) | |
238 (unless (and (zerop high-octet) | |
239 (= (the octet (aref sequence i)) +lf+) | |
240 (= last-octet +cr+)) | |
241 (incf sum)) | |
242 (setq last-octet (if (zerop high-octet) | |
243 (aref sequence i) | |
244 0)) | |
245 (incf i length))) | |
246 (check-end format start (+ end 2) i) | |
247 sum)) | |
248 | |
249 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format)… | |
250 (declare #.*fixnum-optimize-settings*) | |
251 (declare (fixnum start end) (vector sequence)) | |
252 (let ((sum 0) | |
253 (i start) | |
254 (last-octet 0)) | |
255 (declare (fixnum i sum) (type octet last-octet)) | |
256 (decf end 2) | |
257 (loop | |
258 (when (> i end) | |
259 (return)) | |
260 (let* ((high-octet (aref sequence i)) | |
261 (length (cond ((<= #xd8 high-octet #xdf) 4) | |
262 (t 2)))) | |
263 (declare (fixnum length) (type octet high-octet)) | |
264 (unless (and (zerop high-octet) | |
265 (= (the octet (aref sequence (1+ i))) +lf+) | |
266 (= last-octet +cr+)) | |
267 (incf sum)) | |
268 (setq last-octet (if (zerop high-octet) | |
269 (aref sequence (1+ i)) | |
270 0)) | |
271 (incf i length))) | |
272 (check-end format start (+ end 2) i) | |
273 sum)) | |
274 | |
275 (defmethod compute-number-of-chars :before ((format flexi-utf-32-format)… | |
276 (declare #.*fixnum-optimize-settings*) | |
277 (declare (fixnum start end)) | |
278 (declare (ignore sequence)) | |
279 (let ((length (- end start))) | |
280 (when (plusp (mod length 4)) | |
281 (signal-encoding-error format "~A octet~:P cannot be decoded ~ | |
282 using UTF-32 as ~:*~A is not a multiple-value of four." | |
283 length)))) | |
284 | |
285 (defmethod compute-number-of-chars ((format flexi-utf-32-format) sequenc… | |
286 (declare #.*fixnum-optimize-settings*) | |
287 (declare (fixnum start end)) | |
288 (declare (ignore sequence)) | |
289 (ceiling (- end start) 4)) | |
290 | |
291 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format)… | |
292 (declare #.*fixnum-optimize-settings*) | |
293 (declare (fixnum start end) (vector sequence)) | |
294 (let ((i start) | |
295 (length (ceiling (- end start) 4))) | |
296 (decf end 8) | |
297 (loop | |
298 (when (> i end) | |
299 (return)) | |
300 (cond ((loop for j of-type fixnum from i | |
301 for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) | |
302 always (= octet (aref sequence j))) | |
303 (decf length) | |
304 (incf i 8)) | |
305 (t (incf i 4)))) | |
306 length)) | |
307 | |
308 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format)… | |
309 (declare #.*fixnum-optimize-settings*) | |
310 (declare (fixnum start end) (vector sequence)) | |
311 (let ((i start) | |
312 (length (ceiling (- end start) 4))) | |
313 (decf end 8) | |
314 (loop | |
315 (when (> i end) | |
316 (return)) | |
317 (cond ((loop for j of-type fixnum from i | |
318 for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) | |
319 always (= octet (aref sequence j))) | |
320 (decf length) | |
321 (incf i 8)) | |
322 (t (incf i 4)))) | |
323 length)) | |
324 | |
325 (defgeneric compute-number-of-octets (format sequence start end) | |
326 (declare #.*standard-optimize-settings*) | |
327 (:documentation "Computes the exact number of octets required to | |
328 encode the sequence of characters in SEQUENCE from START to END using | |
329 the external format FORMAT.")) | |
330 | |
331 (defmethod compute-number-of-octets :around (format (list list) start en… | |
332 (declare #.*standard-optimize-settings*) | |
333 (call-next-method format (coerce list 'string*) start end)) | |
334 | |
335 (defmethod compute-number-of-octets ((format flexi-8-bit-format) string … | |
336 (declare #.*fixnum-optimize-settings*) | |
337 (declare (fixnum start end)) | |
338 (declare (ignore string)) | |
339 (- end start)) | |
340 | |
341 (defmethod compute-number-of-octets ((format flexi-utf-8-format) string … | |
342 (declare #.*fixnum-optimize-settings*) | |
343 (declare (fixnum start end) (string string)) | |
344 (let ((sum 0) | |
345 (i start)) | |
346 (declare (fixnum i sum)) | |
347 (loop | |
348 (when (>= i end) | |
349 (return)) | |
350 (let* ((char-code (char-code (char string i))) | |
351 (char-length (cond ((< char-code #x80) 1) | |
352 ((< char-code #x800) 2) | |
353 ((< char-code #x10000) 3) | |
354 (t 4)))) | |
355 (declare (fixnum char-length) (type char-code-integer char-code)) | |
356 (incf sum char-length) | |
357 (incf i))) | |
358 sum)) | |
359 | |
360 (defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) st… | |
361 (declare #.*fixnum-optimize-settings*) | |
362 (declare (fixnum start end) (string string)) | |
363 (let ((sum 0) | |
364 (i start)) | |
365 (declare (fixnum i sum)) | |
366 (loop | |
367 (when (>= i end) | |
368 (return)) | |
369 (let* ((char-code (char-code (char string i))) | |
370 (char-length (cond ((= char-code #.(char-code #\Newline)) 2) | |
371 ((< char-code #x80) 1) | |
372 ((< char-code #x800) 2) | |
373 ((< char-code #x10000) 3) | |
374 (t 4)))) | |
375 (declare (fixnum char-length) (type char-code-integer char-code)) | |
376 (incf sum char-length) | |
377 (incf i))) | |
378 sum)) | |
379 | |
380 (defmethod compute-number-of-octets ((format flexi-utf-16-format) string… | |
381 (declare #.*fixnum-optimize-settings*) | |
382 (declare (fixnum start end) (string string)) | |
383 (let ((sum 0) | |
384 (i start)) | |
385 (declare (fixnum i sum)) | |
386 (loop | |
387 (when (>= i end) | |
388 (return)) | |
389 (let* ((char-code (char-code (char string i))) | |
390 (char-length (cond ((< char-code #x10000) 2) | |
391 (t 4)))) | |
392 (declare (fixnum char-length) (type char-code-integer char-code)) | |
393 (incf sum char-length) | |
394 (incf i))) | |
395 sum)) | |
396 | |
397 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format… | |
398 (declare #.*fixnum-optimize-settings*) | |
399 (declare (fixnum start end) (string string)) | |
400 (let ((sum 0) | |
401 (i start)) | |
402 (declare (fixnum i sum)) | |
403 (loop | |
404 (when (>= i end) | |
405 (return)) | |
406 (let* ((char-code (char-code (char string i))) | |
407 (char-length (cond ((= char-code #.(char-code #\Newline)) 4) | |
408 ((< char-code #x10000) 2) | |
409 (t 4)))) | |
410 (declare (fixnum char-length) (type char-code-integer char-code)) | |
411 (incf sum char-length) | |
412 (incf i))) | |
413 sum)) | |
414 | |
415 (defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format… | |
416 (declare #.*fixnum-optimize-settings*) | |
417 (declare (fixnum start end) (string string)) | |
418 (let ((sum 0) | |
419 (i start)) | |
420 (declare (fixnum i sum)) | |
421 (loop | |
422 (when (>= i end) | |
423 (return)) | |
424 (let* ((char-code (char-code (char string i))) | |
425 (char-length (cond ((= char-code #.(char-code #\Newline)) 4) | |
426 ((< char-code #x10000) 2) | |
427 (t 4)))) | |
428 (declare (fixnum char-length) (type char-code-integer char-code)) | |
429 (incf sum char-length) | |
430 (incf i))) | |
431 sum)) | |
432 | |
433 (defmethod compute-number-of-octets ((format flexi-utf-32-format) string… | |
434 (declare #.*fixnum-optimize-settings*) | |
435 (declare (fixnum start end)) | |
436 (declare (ignore string)) | |
437 (* 4 (- end start))) | |
438 | |
439 (defmethod compute-number-of-octets ((format flexi-crlf-mixin) string st… | |
440 (declare #.*fixnum-optimize-settings*) | |
441 (declare (fixnum start end) (string string)) | |
442 (+ (call-next-method) | |
443 (* (case (external-format-name format) | |
444 (:utf-32 4) | |
445 (otherwise 1)) | |
446 (count #\Newline string :start start :end end :test #'char=)))) | |
447 | |
448 (defgeneric character-length (format char) | |
449 (declare #.*fixnum-optimize-settings*) | |
450 (:documentation "Returns the number of octets needed to encode the | |
451 single character CHAR.") | |
452 (:method (format char) | |
453 (compute-number-of-octets format (string char) 0 1))) | |
454 | |
455 (defmethod character-length :around ((format flexi-crlf-mixin) (char (eq… | |
456 (declare #.*fixnum-optimize-settings*) | |
457 (+ (call-next-method format +cr+) | |
458 (call-next-method format +lf+))) | |
459 | |
460 (defmethod character-length ((format flexi-8-bit-format) char) | |
461 (declare #.*fixnum-optimize-settings*) | |
462 (declare (ignore char)) | |
463 1) | |
464 | |
465 (defmethod character-length ((format flexi-utf-32-format) char) | |
466 (declare #.*fixnum-optimize-settings*) | |
467 (declare (ignore char)) | |
468 |