| tencode.lisp - clic - Clic is an command line interactive client for gopher wri… | |
| git clone git://bitreich.org/clic/ git://hg6vgqziawt5s4dj.onion/clic/ | |
| Log | |
| Files | |
| Refs | |
| Tags | |
| LICENSE | |
| --- | |
| tencode.lisp (14245B) | |
| --- | |
| 1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 1… | |
| 2 ;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.26 2008/05/… | |
| 3 | |
| 4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. | |
| 5 | |
| 6 ;;; Redistribution and use in source and binary forms, with or without | |
| 7 ;;; modification, are permitted provided that the following conditions | |
| 8 ;;; are met: | |
| 9 | |
| 10 ;;; * Redistributions of source code must retain the above copyright | |
| 11 ;;; notice, this list of conditions and the following disclaimer. | |
| 12 | |
| 13 ;;; * Redistributions in binary form must reproduce the above | |
| 14 ;;; copyright notice, this list of conditions and the following | |
| 15 ;;; disclaimer in the documentation and/or other materials | |
| 16 ;;; provided with the distribution. | |
| 17 | |
| 18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED | |
| 19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
| 22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | |
| 24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
| 26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 | |
| 30 (in-package :flexi-streams) | |
| 31 | |
| 32 (defgeneric char-to-octets (format char writer) | |
| 33 (declare #.*standard-optimize-settings*) | |
| 34 (:documentation "Converts the character CHAR to a sequence of octets | |
| 35 using the external format FORMAT. The conversion is performed by | |
| 36 calling the unary function \(which must be a functional object) WRITER | |
| 37 repeatedly each octet. The return value of this function is | |
| 38 unspecified.")) | |
| 39 | |
| 40 (defgeneric write-sequence* (format stream sequence start end) | |
| 41 (declare #.*standard-optimize-settings*) | |
| 42 (:documentation "A generic function which dispatches on the external | |
| 43 format and does the real work for STREAM-WRITE-SEQUENCE.")) | |
| 44 | |
| 45 (defgeneric string-to-octets* (format string start end) | |
| 46 (declare #.*standard-optimize-settings*) | |
| 47 (:documentation "A generic function which dispatches on the external | |
| 48 format and does the real work for STRING-TO-OCTETS.")) | |
| 49 | |
| 50 (defmethod string-to-octets* :around (format (list list) start end) | |
| 51 (declare #.*standard-optimize-settings*) | |
| 52 (string-to-octets* format (coerce list 'string*) start end)) | |
| 53 | |
| 54 (defmacro define-sequence-writers ((format-class) &body body) | |
| 55 "Non-hygienic utility macro which defines methods for | |
| 56 WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For | |
| 57 BODY see the docstring of DEFINE-CHAR-ENCODERS." | |
| 58 (let ((body `((locally | |
| 59 (declare #.*fixnum-optimize-settings*) | |
| 60 ,@body)))) | |
| 61 `(progn | |
| 62 (defmethod string-to-octets* ((format ,format-class) string start… | |
| 63 (declare #.*standard-optimize-settings*) | |
| 64 (declare (fixnum start end) (string string)) | |
| 65 (let ((octets (make-array (compute-number-of-octets format stri… | |
| 66 :element-type 'octet)) | |
| 67 (j 0)) | |
| 68 (declare (fixnum j)) | |
| 69 (loop for i of-type fixnum from start below end do | |
| 70 (macrolet ((octet-writer (form) | |
| 71 `(progn | |
| 72 (setf (aref (the (array octet *) octets… | |
| 73 (incf j)))) | |
| 74 (symbol-macrolet ((char-getter (char string i))) | |
| 75 (progn ,@body)))) | |
| 76 octets)) | |
| 77 (defmethod write-sequence* ((format ,format-class) stream sequenc… | |
| 78 (declare #.*standard-optimize-settings*) | |
| 79 (declare (fixnum start end)) | |
| 80 (with-accessors ((column flexi-stream-column)) | |
| 81 stream | |
| 82 (let* ((octet-seen-p nil) | |
| 83 (buffer-pos 0) | |
| 84 ;; estimate should be good enough... | |
| 85 (factor (encoding-factor format)) | |
| 86 ;; we don't want arbitrarily large buffer, do we? | |
| 87 (buffer-size (min +buffer-size+ (ceiling (* factor (- … | |
| 88 (buffer (make-octet-buffer buffer-size)) | |
| 89 (underlying-stream (flexi-stream-stream stream))) | |
| 90 (declare (fixnum buffer-pos buffer-size) | |
| 91 (boolean octet-seen-p) | |
| 92 (type (array octet *) buffer)) | |
| 93 (macrolet ((octet-writer (form) | |
| 94 `(write-octet ,form))) | |
| 95 (labels ((flush-buffer () | |
| 96 "Sends all octets in BUFFER to the underlying … | |
| 97 (write-sequence buffer underlying-stream :end … | |
| 98 (setq buffer-pos 0)) | |
| 99 (write-octet (octet) | |
| 100 "Adds one octet to the buffer and flushes it i… | |
| 101 (declare (type octet octet)) | |
| 102 (when (>= buffer-pos buffer-size) | |
| 103 (flush-buffer)) | |
| 104 (setf (aref buffer buffer-pos) octet) | |
| 105 (incf buffer-pos)) | |
| 106 (write-object (object) | |
| 107 "Dispatches to WRITE-OCTET or WRITE-CHARACTER | |
| 108 depending on the type of OBJECT." | |
| 109 (etypecase object | |
| 110 (octet (setq octet-seen-p t) | |
| 111 (write-octet object)) | |
| 112 (character (symbol-macrolet ((char-getter ob… | |
| 113 ,@body))))) | |
| 114 (macrolet ((iterate (&body output-forms) | |
| 115 "An unhygienic macro to implement the actu… | |
| 116 iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one | |
| 117 sequence element and put its octet representation into the buffer." | |
| 118 `(loop for index of-type fixnum from start… | |
| 119 do (progn ,@output-forms) | |
| 120 finally (when (plusp buffer-pos) | |
| 121 (flush-buffer))))) | |
| 122 (etypecase sequence | |
| 123 (string (iterate | |
| 124 (symbol-macrolet ((char-getter (char seque… | |
| 125 ,@body))) | |
| 126 (array (iterate | |
| 127 (symbol-macrolet ((char-getter (aref sequen… | |
| 128 ,@body))) | |
| 129 (list (iterate (write-object (nth index sequence))… | |
| 130 ;; update the column slot, setting it to NIL if we sent | |
| 131 ;; octets | |
| 132 (setq column | |
| 133 (cond (octet-seen-p nil) | |
| 134 (t (let ((last-newline-pos (position #\Newl… | |
| 135 :test … | |
| 136 :start… | |
| 137 :end e… | |
| 138 :from-… | |
| 139 (cond (last-newline-pos (- end last-ne… | |
| 140 (column (+ column (- end start))… | |
| 141 | |
| 142 (defmacro define-char-encoders ((lf-format-class cr-format-class crlf-fo… | |
| 143 "Non-hygienic utility macro which defines several encoding-related | |
| 144 methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and | |
| 145 CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same | |
| 146 encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and | |
| 147 similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. | |
| 148 BODY is a code template for the code to convert one character to | |
| 149 octets. BODY must contain a symbol CHAR-GETTER representing the form | |
| 150 which is used to obtain the character and a forms like \(OCTET-WRITE | |
| 151 <thing>) to write the octet <thing>. The CHAR-GETTER form might be | |
| 152 called more than once." | |
| 153 `(progn | |
| 154 (defmethod char-to-octets ((format ,lf-format-class) char writer) | |
| 155 (declare #.*fixnum-optimize-settings*) | |
| 156 (declare (character char) (function writer)) | |
| 157 (symbol-macrolet ((char-getter char)) | |
| 158 (macrolet ((octet-writer (form) | |
| 159 `(funcall writer ,form))) | |
| 160 ,@body))) | |
| 161 (define-sequence-writers (,lf-format-class) ,@body) | |
| 162 (define-sequence-writers (,cr-format-class) | |
| 163 ;; modify the body so that the getter replaces a #\Newline | |
| 164 ;; with a #\Return | |
| 165 ,@(sublis `((char-getter . ,(with-unique-names (char) | |
| 166 `(let ((,char char-getter)) | |
| 167 (declare (character ,char)) | |
| 168 (if (char= ,char #\Newline) | |
| 169 #\Return | |
| 170 ,char))))) | |
| 171 body)) | |
| 172 (define-sequence-writers (,crlf-format-class) | |
| 173 ;; modify the body so that we potentially write octets for | |
| 174 ;; two characters (#\Return and #\Linefeed) - the original | |
| 175 ;; body is wrapped with the WRITE-CHAR local function | |
| 176 ,(with-unique-names (char write-char) | |
| 177 `(flet ((,write-char (,char) | |
| 178 ,@(sublis `((char-getter . ,char)) body))) | |
| 179 (let ((,char char-getter)) | |
| 180 (declare (character ,char)) | |
| 181 (cond ((char= ,char #\Newline) | |
| 182 (,write-char #\Return) | |
| 183 (,write-char #\Linefeed)) | |
| 184 (t (,write-char ,char))))))))) | |
| 185 | |
| 186 (define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format fle… | |
| 187 (let ((octet (char-code char-getter))) | |
| 188 (when (> octet 255) | |
| 189 (signal-encoding-error format "~S (code ~A) is not a LATIN-1 chara… | |
| 190 (octet-writer octet))) | |
| 191 | |
| 192 (define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-cr… | |
| 193 (let ((octet (char-code char-getter))) | |
| 194 (when (> octet 127) | |
| 195 (signal-encoding-error format "~S (code ~A) is not an ASCII charac… | |
| 196 (octet-writer octet))) | |
| 197 | |
| 198 (define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-cr… | |
| 199 (with-accessors ((encoding-hash external-format-encoding-hash)) | |
| 200 format | |
| 201 (let ((octet (gethash (char-code char-getter) encoding-hash))) | |
| 202 (unless octet | |
| 203 (signal-encoding-error format "~S (code ~A) is not in this encod… | |
| 204 (octet-writer octet)))) | |
| 205 | |
| 206 (define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-cr… | |
| 207 ;; the old version using LDB was more elegant, but some Lisps had | |
| 208 ;; trouble optimizing it | |
| 209 (let ((char-code (char-code char-getter))) | |
| 210 (tagbody | |
| 211 (cond ((< char-code #x80) | |
| 212 (octet-writer char-code) | |
| 213 (go zero)) | |
| 214 ((< char-code #x800) | |
| 215 (octet-writer (logior* #b11000000 (ash* char-code -6))) | |
| 216 (go one)) | |
| 217 ((< char-code #x10000) | |
| 218 (octet-writer (logior* #b11100000 (ash* char-code -12))) | |
| 219 (go two)) | |
| 220 (t | |
| 221 (octet-writer (logior* #b11110000 (ash* char-code -18))))) | |
| 222 (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-co… | |
| 223 two | |
| 224 (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-co… | |
| 225 one | |
| 226 (octet-writer (logior* #b10000000 (logand* #b00111111 char-code))) | |
| 227 zero))) | |
| 228 | |
| 229 (define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format … | |
| 230 (flet ((write-word (word) | |
| 231 (octet-writer (logand* #x00ff word)) | |
| 232 (octet-writer (ash* (logand* #xff00 word) -8)))) | |
| 233 (declare (inline write-word)) | |
| 234 (let ((char-code (char-code char-getter))) | |
| 235 (declare (type char-code-integer char-code)) | |
| 236 (cond ((< char-code #x10000) | |
| 237 (write-word char-code)) | |
| 238 (t (decf char-code #x10000) | |
| 239 (write-word (logior* #xd800 (ash* char-code -10))) | |
| 240 (write-word (logior* #xdc00 (logand* #x03ff char-code))))… | |
| 241 | |
| 242 (define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format … | |
| 243 (flet ((write-word (word) | |
| 244 (octet-writer (ash* (logand* #xff00 word) -8)) | |
| 245 (octet-writer (logand* #x00ff word)))) | |
| 246 (declare (inline write-word)) | |
| 247 (let ((char-code (char-code char-getter))) | |
| 248 (declare (type char-code-integer char-code)) | |
| 249 (cond ((< char-code #x10000) | |
| 250 (write-word char-code)) | |
| 251 (t (decf char-code #x10000) | |
| 252 (write-word (logior* #xd800 (ash* char-code -10))) | |
| 253 (write-word (logior* #xdc00 (logand* #x03ff char-code))))… | |
| 254 | |
| 255 (define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format … | |
| 256 (let ((char-code (char-code char-getter))) | |
| 257 (octet-writer (logand* #x00ff char-code)) | |
| 258 (octet-writer (logand* #x00ff (ash* char-code -8))) | |
| 259 (octet-writer (logand* #x00ff (ash* char-code -16))) | |
| 260 (octet-writer (logand* #x00ff (ash* char-code -24))))) | |
| 261 | |
| 262 (define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format … | |
| 263 (let ((char-code (char-code char-getter))) | |
| 264 (octet-writer (logand* #x00ff (ash* char-code -24))) | |
| 265 (octet-writer (logand* #x00ff (ash* char-code -16))) | |
| 266 (octet-writer (logand* #x00ff (ash* char-code -8))) | |
| 267 (octet-writer (logand* #x00ff char-code)))) | |
| 268 | |
| 269 (defmethod char-to-octets ((format flexi-cr-mixin) char writer) | |
| 270 (declare #.*fixnum-optimize-settings*) | |
| 271 (declare (character char)) | |
| 272 (if (char= char #\Newline) | |
| 273 (call-next-method format #\Return writer) | |
| 274 (call-next-method))) | |
| 275 | |
| 276 (defmethod char-to-octets ((format flexi-crlf-mixin) char writer) | |
| 277 (declare #.*fixnum-optimize-settings*) | |
| 278 (declare (character char)) | |
| 279 (cond ((char= char #\Newline) | |
| 280 (call-next-method format #\Return writer) | |
| 281 (call-next-method format #\Linefeed writer)) | |
| 282 (t (call-next-method)))) |