external-format.lisp - clic - Clic is an command line interactive client for go… | |
git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65… | |
Log | |
Files | |
Refs | |
Tags | |
README | |
LICENSE | |
--- | |
external-format.lisp (17187B) | |
--- | |
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 1… | |
2 ;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.24… | |
3 | |
4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. | |
5 | |
6 ;;; Redistribution and use in source and binary forms, with or without | |
7 ;;; modification, are permitted provided that the following conditions | |
8 ;;; are met: | |
9 | |
10 ;;; * Redistributions of source code must retain the above copyright | |
11 ;;; notice, this list of conditions and the following disclaimer. | |
12 | |
13 ;;; * Redistributions in binary form must reproduce the above | |
14 ;;; copyright notice, this list of conditions and the following | |
15 ;;; disclaimer in the documentation and/or other materials | |
16 ;;; provided with the distribution. | |
17 | |
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED | |
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | |
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | |
30 (in-package :flexi-streams) | |
31 | |
32 (defclass external-format () | |
33 ((name :initarg :name | |
34 :reader external-format-name | |
35 :documentation "The name of the external format - a | |
36 keyword.") | |
37 (id :initarg :id | |
38 :initform nil | |
39 :reader external-format-id | |
40 :documentation "If the external format denotes a Windows | |
41 code page this ID specifies which one to use. Otherwise the | |
42 value is ignored \(and usually NIL).") | |
43 (little-endian :initarg :little-endian | |
44 :initform *default-little-endian* | |
45 :reader external-format-little-endian | |
46 :documentation "Whether multi-octet values are | |
47 read and written with the least significant octet first. For | |
48 8-bit encodings like :ISO-8859-1 this value is ignored.") | |
49 (eol-style :initarg :eol-style | |
50 :reader external-format-eol-style | |
51 :documentation "The character\(s) to or from which | |
52 a #\Newline will be translated - one of the keywords :CR, :LF, | |
53 or :CRLF.")) | |
54 (:documentation "EXTERNAL-FORMAT objects are used to denote | |
55 encodings for flexi streams or for the string functions defined in | |
56 strings.lisp.")) | |
57 | |
58 (defmethod make-load-form ((thing external-format) &optional environment) | |
59 "Defines a way to reconstruct external formats. Needed for OpenMCL." | |
60 (make-load-form-saving-slots thing :environment environment)) | |
61 | |
62 (defclass flexi-cr-mixin () | |
63 () | |
64 (:documentation "A mixin for external-formats where the end-of-line | |
65 designator is #\Return.")) | |
66 | |
67 (defclass flexi-crlf-mixin () | |
68 () | |
69 (:documentation "A mixin for external-formats where the end-of-line | |
70 designator is the sequence #\Return #\Linefeed.")) | |
71 | |
72 (defclass flexi-8-bit-format (external-format) | |
73 ((encoding-hash :accessor external-format-encoding-hash) | |
74 (decoding-table :accessor external-format-decoding-table)) | |
75 (:documentation "The class for all flexi streams which use an 8-bit | |
76 encoding and thus need additional slots for the encoding/decoding | |
77 tables.")) | |
78 | |
79 (defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) | |
80 () | |
81 (:documentation "Special class for external formats which use an | |
82 8-bit encoding /and/ have #\Return as the line-end character.")) | |
83 | |
84 (defclass flexi-crlf-8-bit-format (flexi-crlf-mixin flexi-8-bit-format) | |
85 () | |
86 (:documentation "Special class for external formats which use an | |
87 8-bit encoding /and/ have the sequence #\Return #\Linefeed as the | |
88 line-end character.")) | |
89 | |
90 (defclass flexi-ascii-format (flexi-8-bit-format) | |
91 () | |
92 (:documentation "Special class for external formats which use the | |
93 US-ASCII encoding.")) | |
94 | |
95 (defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) | |
96 () | |
97 (:documentation "Special class for external formats which use the | |
98 US-ASCII encoding /and/ have #\Return as the line-end character.")) | |
99 | |
100 (defclass flexi-crlf-ascii-format (flexi-crlf-mixin flexi-ascii-format) | |
101 () | |
102 (:documentation "Special class for external formats which use the | |
103 US-ASCII encoding /and/ have the sequence #\Return #\Linefeed as the | |
104 line-end character.")) | |
105 | |
106 (defclass flexi-latin-1-format (flexi-8-bit-format) | |
107 () | |
108 (:documentation "Special class for external formats which use the | |
109 ISO-8859-1 encoding.")) | |
110 | |
111 (defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) | |
112 () | |
113 (:documentation "Special class for external formats which use the | |
114 ISO-8859-1 encoding /and/ have #\Return as the line-end character.")) | |
115 | |
116 (defclass flexi-crlf-latin-1-format (flexi-crlf-mixin flexi-latin-1-form… | |
117 () | |
118 (:documentation "Special class for external formats which use the | |
119 ISO-8859-1 encoding /and/ have the sequence #\Return #\Linefeed as the | |
120 line-end character.")) | |
121 | |
122 (defclass flexi-utf-32-format (external-format) | |
123 () | |
124 (:documentation "Abstract class for external formats which use the | |
125 UTF-32 encoding.")) | |
126 | |
127 (defclass flexi-utf-32-le-format (flexi-utf-32-format) | |
128 () | |
129 (:documentation "Special class for external formats which use the | |
130 UTF-32 encoding with little-endian byte ordering.")) | |
131 | |
132 (defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-form… | |
133 () | |
134 (:documentation "Special class for external formats which use the | |
135 UTF-32 encoding with little-endian byte ordering /and/ have #\Return | |
136 as the line-end character.")) | |
137 | |
138 (defclass flexi-crlf-utf-32-le-format (flexi-crlf-mixin flexi-utf-32-le-… | |
139 () | |
140 (:documentation "Special class for external formats which use the | |
141 UTF-32 encoding with little-endian byte ordering /and/ have the | |
142 sequence #\Return #\Linefeed as the line-end character.")) | |
143 | |
144 (defclass flexi-utf-32-be-format (flexi-utf-32-format) | |
145 () | |
146 (:documentation "Special class for external formats which use the | |
147 UTF-32 encoding with big-endian byte ordering.")) | |
148 | |
149 (defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-form… | |
150 () | |
151 (:documentation "Special class for external formats which use the | |
152 UTF-32 encoding with big-endian byte ordering /and/ have #\Return as | |
153 the line-end character.")) | |
154 | |
155 (defclass flexi-crlf-utf-32-be-format (flexi-crlf-mixin flexi-utf-32-be-… | |
156 () | |
157 (:documentation "Special class for external formats which use the | |
158 the UTF-32 encoding with big-endian byte ordering /and/ have the | |
159 sequence #\Return #\Linefeed as the line-end character.")) | |
160 | |
161 (defclass flexi-utf-16-format (external-format) | |
162 () | |
163 (:documentation "Abstract class for external formats which use the | |
164 UTF-16 encoding.")) | |
165 | |
166 (defclass flexi-utf-16-le-format (flexi-utf-16-format) | |
167 () | |
168 (:documentation "Special class for external formats which use the | |
169 UTF-16 encoding with little-endian byte ordering.")) | |
170 | |
171 (defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-form… | |
172 () | |
173 (:documentation "Special class for external formats which use the | |
174 UTF-16 encoding with little-endian byte ordering /and/ have #\Return | |
175 as the line-end character.")) | |
176 | |
177 (defclass flexi-crlf-utf-16-le-format (flexi-crlf-mixin flexi-utf-16-le-… | |
178 () | |
179 (:documentation "Special class for external formats which use the | |
180 UTF-16 encoding with little-endian byte ordering /and/ have the | |
181 sequence #\Return #\Linefeed as the line-end character.")) | |
182 | |
183 (defclass flexi-utf-16-be-format (flexi-utf-16-format) | |
184 () | |
185 (:documentation "Special class for external formats which use the | |
186 UTF-16 encoding with big-endian byte ordering.")) | |
187 | |
188 (defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-form… | |
189 () | |
190 (:documentation "Special class for external formats which use the | |
191 UTF-16 encoding with big-endian byte ordering /and/ have #\Return as | |
192 the line-end character.")) | |
193 | |
194 (defclass flexi-crlf-utf-16-be-format (flexi-crlf-mixin flexi-utf-16-be-… | |
195 () | |
196 (:documentation "Special class for external formats which use the | |
197 UTF-16 encoding with big-endian byte ordering /and/ have the sequence | |
198 #\Return #\Linefeed as the line-end character.")) | |
199 | |
200 (defclass flexi-utf-8-format (external-format) | |
201 () | |
202 (:documentation "Special class for external formats which use the | |
203 UTF-8 encoding.")) | |
204 | |
205 (defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) | |
206 () | |
207 (:documentation "Special class for external formats which use the | |
208 UTF-8 encoding /and/ have #\Return as the line-end character.")) | |
209 | |
210 (defclass flexi-crlf-utf-8-format (flexi-crlf-mixin flexi-utf-8-format) | |
211 () | |
212 (:documentation "Special class for external formats which use the | |
213 UTF-8 encoding /and/ have the sequence #\Return #\Linefeed as the | |
214 line-end character.")) | |
215 | |
216 (defmethod initialize-instance :after ((external-format flexi-8-bit-form… | |
217 "Sets the fixed encoding/decoding tables for this particular | |
218 external format." | |
219 (declare #.*standard-optimize-settings*) | |
220 (declare (ignore initargs)) | |
221 (with-accessors ((encoding-hash external-format-encoding-hash) | |
222 (decoding-table external-format-decoding-table) | |
223 (name external-format-name) | |
224 (id external-format-id)) | |
225 external-format | |
226 (multiple-value-setq (encoding-hash decoding-table) | |
227 (cond ((ascii-name-p name) | |
228 (values +ascii-hash+ +ascii-table+)) | |
229 ((koi8-r-name-p name) | |
230 (values +koi8-r-hash+ +koi8-r-table+)) | |
231 ((iso-8859-name-p name) | |
232 (values (cdr (assoc name +iso-8859-hashes+ :test #'eq)) … | |
233 (cdr (assoc name +iso-8859-tables+ :test #'eq)))) | |
234 ((code-page-name-p name) | |
235 (values (cdr (assoc id +code-page-hashes+)) … | |
236 (cdr (assoc id +code-page-tables+)))))))) | |
237 | |
238 (defun external-format-class-name (real-name &key eol-style little-endia… | |
239 "Given the initargs for a general external format returns the name | |
240 \(a symbol) of the most specific subclass matching these arguments." | |
241 (declare #.*standard-optimize-settings*) | |
242 (declare (ignore id)) | |
243 (cond ((ascii-name-p real-name) | |
244 (ecase eol-style | |
245 (:lf 'flexi-ascii-format) | |
246 (:cr 'flexi-cr-ascii-format) | |
247 (:crlf 'flexi-crlf-ascii-format))) | |
248 ((eq real-name :iso-8859-1) | |
249 (ecase eol-style | |
250 (:lf 'flexi-latin-1-format) | |
251 (:cr 'flexi-cr-latin-1-format) | |
252 (:crlf 'flexi-crlf-latin-1-format))) | |
253 ((or (koi8-r-name-p real-name) | |
254 (iso-8859-name-p real-name) | |
255 (code-page-name-p real-name)) | |
256 (ecase eol-style | |
257 (:lf 'flexi-8-bit-format) | |
258 (:cr 'flexi-cr-8-bit-format) | |
259 (:crlf 'flexi-crlf-8-bit-format))) | |
260 (t (ecase real-name | |
261 (:utf-8 (ecase eol-style | |
262 (:lf 'flexi-utf-8-format) | |
263 (:cr 'flexi-cr-utf-8-format) | |
264 (:crlf 'flexi-crlf-utf-8-format))) | |
265 (:utf-16 (ecase eol-style | |
266 (:lf (if little-endian | |
267 'flexi-utf-16-le-format | |
268 'flexi-utf-16-be-format)) | |
269 (:cr (if little-endian | |
270 'flexi-cr-utf-16-le-format | |
271 'flexi-cr-utf-16-be-format)) | |
272 (:crlf (if little-endian | |
273 'flexi-crlf-utf-16-le-format | |
274 'flexi-crlf-utf-16-be-format)))) | |
275 (:utf-32 (ecase eol-style | |
276 (:lf (if little-endian | |
277 'flexi-utf-32-le-format | |
278 'flexi-utf-32-be-format)) | |
279 (:cr (if little-endian | |
280 'flexi-cr-utf-32-le-format | |
281 'flexi-cr-utf-32-be-format)) | |
282 (:crlf (if little-endian | |
283 'flexi-crlf-utf-32-le-format | |
284 'flexi-crlf-utf-32-be-format)))))))) | |
285 | |
286 (defun make-external-format% (name &key (little-endian *default-little-e… | |
287 id eol-style) | |
288 "Used internally by MAKE-EXTERNAL-FORMAT to default some of the | |
289 keywords arguments and to determine the right subclass of | |
290 EXTERNAL-FORMAT." | |
291 (declare #.*standard-optimize-settings*) | |
292 (let* ((real-name (normalize-external-format-name name)) | |
293 (initargs | |
294 (cond ((or (iso-8859-name-p real-name) | |
295 (koi8-r-name-p real-name) | |
296 (ascii-name-p real-name)) | |
297 (list :eol-style (or eol-style *default-eol-style*))) | |
298 ((code-page-name-p real-name) | |
299 (list :id (or (known-code-page-id-p id) | |
300 (error 'external-format-error | |
301 :format-control "Unknown code page… | |
302 :format-arguments (list id))) | |
303 ;; default EOL style for Windows code pages is :C… | |
304 :eol-style (or eol-style :crlf))) | |
305 (t (list :eol-style (or eol-style *default-eol-style*) | |
306 :little-endian little-endian))))) | |
307 (apply #'make-instance (apply #'external-format-class-name real-name… | |
308 :name real-name | |
309 initargs))) | |
310 | |
311 (defun make-external-format (name &rest args | |
312 &key (little-endian *default-little-en… | |
313 id eol-style) | |
314 "Creates and returns an external format object as specified. | |
315 NAME is a keyword like :LATIN1 or :UTF-8, LITTLE-ENDIAN specifies | |
316 the `endianess' of the external format and is ignored for 8-bit | |
317 encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF | |
318 which denote the end-of-line character \(sequence), ID is the ID | |
319 of a Windows code page \(and ignored for other encodings)." | |
320 (declare #.*standard-optimize-settings*) | |
321 ;; the keyword arguments are only there for arglist display in the IDE | |
322 (declare (ignore id little-endian)) | |
323 (let ((shortcut-args (cdr (assoc name +shortcut-map+ :test #'string-eq… | |
324 (cond (shortcut-args | |
325 (apply #'make-external-format% | |
326 (append shortcut-args | |
327 `(:eol-style ,eol-style)))) | |
328 (t (apply #'make-external-format% name args))))) | |
329 | |
330 (defun maybe-convert-external-format (external-format) | |
331 "Given an external format designator \(a keyword, a list, or an | |
332 EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT | |
333 object." | |
334 (declare #.*standard-optimize-settings*) | |
335 (typecase external-format | |
336 (symbol (make-external-format external-format)) | |
337 (list (apply #'make-external-format external-format)) | |
338 (otherwise external-format))) | |
339 | |
340 (defun external-format-equal (ef1 ef2) | |
341 "Checks whether two EXTERNAL-FORMAT objects denote the same encoding." | |
342 (declare #.*standard-optimize-settings*) | |
343 (let* ((name1 (external-format-name ef1)) | |
344 (code-page-name-p (code-page-name-p name1))) | |
345 ;; they must habe the same canonical name | |
346 (and (eq name1 | |
347 (external-format-name ef2)) | |
348 ;; if both are code pages the IDs must be the same | |
349 (or (not code-page-name-p) | |
350 (eql (external-format-id ef1) | |
351 (external-format-id ef2))) | |
352 ;; for non-8-bit encodings the endianess must be the same | |
353 (or code-page-name-p | |
354 (ascii-name-p name1) | |
355 (koi8-r-name-p name1) | |
356 (iso-8859-name-p name1) | |
357 (eq name1 :utf-8) | |
358 (eq (not (external-format-little-endian ef1)) | |
359 (not (external-format-little-endian ef2)))) | |
360 ;; the EOL style must also be the same | |
361 (eq (external-format-eol-style ef1) | |
362 (external-format-eol-style ef2))))) | |
363 | |
364 (defun normalize-external-format (external-format) | |
365 "Returns a list which is a `normalized' representation of the | |
366 external format EXTERNAL-FORMAT. Used internally by PRINT-OBJECT, for | |
367 example. Basically, the result is an argument list that can be fed | |
368 back to MAKE-EXTERNAL-FORMAT to create an equivalent object." | |
369 (declare #.*standard-optimize-settings*) | |
370 (let ((name (external-format-name external-format)) | |
371 (eol-style (external-format-eol-style external-format))) | |
372 (cond ((or (ascii-name-p name) | |
373 (koi8-r-name-p name) | |
374 (iso-8859-name-p name) | |
375 (eq name :utf-8)) | |
376 (list name :eol-style eol-style)) | |
377 ((code-page-name-p name) | |
378 (list name | |
379 :id (external-format-id external-format) | |
380 :eol-style eol-style)) | |
381 (t (list name | |
382 :eol-style eol-style | |
383 :little-endian (external-format-little-endian externa… | |
384 | |
385 (defmethod print-object ((object external-format) stream) | |
386 "How an EXTERNAL-FORMAT object is rendered. Uses | |
387 NORMALIZE-EXTERNAL-FORMAT." | |
388 (print-unreadable-object (object stream :type t :identity t) | |
389 (prin1 (normalize-external-format object) stream))) |