md-printlinks.c - sites - public wiki contents of suckless.org | |
git clone git://git.suckless.org/sites | |
Log | |
Files | |
Refs | |
--- | |
md-printlinks.c (9757B) | |
--- | |
1 /* process Markdown (based on smu code), but only output links */ | |
2 #include <sys/types.h> | |
3 | |
4 #include <ctype.h> | |
5 #include <errno.h> | |
6 #include <stdarg.h> | |
7 #include <stdio.h> | |
8 #include <stdint.h> | |
9 #include <stdlib.h> | |
10 #include <string.h> | |
11 | |
12 #ifdef __OpenBSD__ | |
13 #include <unistd.h> | |
14 #else | |
15 #define pledge(a,b) 0 | |
16 #endif | |
17 | |
18 #define READ_BUF_SIZ 16384 | |
19 #define LEN(x) sizeof(x)/sizeof(x[0]) | |
20 #define ADDC(b,i) if (i % READ_BUF_SIZ == 0) { b = realloc(b, (i + READ… | |
21 | |
22 typedef int (*Parser)(const char *, const char *, int); | |
23 typedef struct { | |
24 char *search; | |
25 int process; | |
26 char *before, *after; | |
27 } Tag; | |
28 | |
29 static int dolineprefix(const char *begin, const char *end, int newblock… | |
30 static int dolink(const char *begin, const char *end, int newblock); … | |
31 static int dolist(const char *begin, const char *end, int newblock); … | |
32 static int doparagraph(const char *begin, const char *end, int newblock)… | |
33 static int doshortlink(const char *begin, const char *end, int newblock)… | |
34 static int dosurround(const char *begin, const char *end, int newblock);… | |
35 static int dounderline(const char *begin, const char *end, int newblock)… | |
36 static void *ereallocz(void *p, size_t size); | |
37 static void hprint(const char *begin, const char *end); … | |
38 static void process(const char *begin, const char *end, int newblock); … | |
39 | |
40 /* list of parsers */ | |
41 static Parser parsers[] = { | |
42 dounderline, dolineprefix, dolist, doparagraph, dosurround, doli… | |
43 }; | |
44 | |
45 static Tag lineprefix[] = { | |
46 { " ", 0, "<pre><code>", "</code></pre>" }, | |
47 { "\t", 0, "<pre><code>", "</code></pre>" … | |
48 { "> ", 2, "<blockquote>", "</block… | |
49 { "###### ", 1, "<h6>", "</h6>" }, | |
50 { "##### ", 1, "<h5>", "</h5>" }, | |
51 { "#### ", 1, "<h4>", "</h4>" }, | |
52 { "### ", 1, "<h3>", "</h3>" }, | |
53 { "## ", 1, "<h2>", "</h2>" }, | |
54 { "# ", 1, "<h1>", "</h1>" … | |
55 { "- - -\n", 1, "<hr/>", ""}, | |
56 }; | |
57 | |
58 static Tag underline[] = { | |
59 { "=", 1, "<h1>", "</h1>\n"… | |
60 { "-", 1, "<h2>", "</h2>\n"… | |
61 }; | |
62 | |
63 static Tag surround[] = { | |
64 { "``", 0, "<code>", "</code>" }, | |
65 { "`", 0, "<code>", "</code>" }, | |
66 { "___", 1, "<b><i>", "</i></b>" }, | |
67 { "***", 1, "<b><i>", "</i></b>" }, | |
68 { "__", 1, "<b>", "</b>" }, | |
69 { "**", 1, "<b>", "</b>" }, | |
70 { "_", 1, "<i>", "</i>" }, | |
71 { "*", 1, "<i>", "</i>" }, | |
72 }; | |
73 | |
74 void | |
75 eprint(const char *format, ...) | |
76 { | |
77 va_list ap; | |
78 | |
79 va_start(ap, format); | |
80 vfprintf(stderr, format, ap); | |
81 va_end(ap); | |
82 if (format[0] && format[strlen(format) - 1] == ':') | |
83 fputs(strerror(errno), stderr); | |
84 fputc('\n', stderr); | |
85 exit(1); | |
86 } | |
87 | |
88 int | |
89 dolineprefix(const char *begin, const char *end, int newblock) | |
90 { | |
91 unsigned int i, j, l; | |
92 char *buffer; | |
93 const char *p; | |
94 | |
95 if (newblock) | |
96 p = begin; | |
97 else if (*begin == '\n') | |
98 p = begin + 1; | |
99 else | |
100 return 0; | |
101 for (i = 0; i < LEN(lineprefix); i++) { | |
102 l = strlen(lineprefix[i].search); | |
103 if (end - p < l) | |
104 continue; | |
105 if (strncmp(lineprefix[i].search, p, l)) | |
106 continue; | |
107 if (lineprefix[i].search[l-1] == '\n') { | |
108 return l; | |
109 } | |
110 if (!(buffer = malloc(BUFSIZ))) | |
111 eprint("malloc:"); | |
112 buffer[0] = '\0'; | |
113 | |
114 for (j = 0, p += l; p < end; p++, j++) { | |
115 ADDC(buffer, j) = *p; | |
116 if (*p == '\n' && p + l < end) { | |
117 if (strncmp(lineprefix[i].search, p + 1,… | |
118 break; | |
119 p += l; | |
120 } | |
121 } | |
122 | |
123 ADDC(buffer, j) = '\0'; | |
124 if (lineprefix[i].process) | |
125 process(buffer, buffer + strlen(buffer), linepre… | |
126 free(buffer); | |
127 return -(p - begin); | |
128 } | |
129 return 0; | |
130 } | |
131 | |
132 int | |
133 dolink(const char *begin, const char *end, int newblock) | |
134 { | |
135 int img, len, sep; | |
136 const char *desc, *link, *p, *q, *descend, *linkend; | |
137 const char *title = NULL, *titleend = NULL; | |
138 | |
139 if (*begin == '[') | |
140 img = 0; | |
141 else if (strncmp(begin, ") || p > end) | |
147 return 0; | |
148 for (q = strstr(desc, ") || p > end) | |
150 return 0; | |
151 descend = p; | |
152 link = p + 2; | |
153 if (!(q = strchr(link, ')')) || q > end) | |
154 return 0; | |
155 if ((p = strpbrk(link, "\"'")) && p < end && q > p) { | |
156 sep = p[0]; /* separator: can be " or ' */ | |
157 title = p + 1; | |
158 /* strip trailing whitespace */ | |
159 for (linkend = p; linkend > link && isspace(*(linkend - … | |
160 ; | |
161 if (!(p = strchr(title, sep)) || q > end || p > q) | |
162 return 0; | |
163 titleend = p; | |
164 len = p + 2 - begin; | |
165 } | |
166 else { | |
167 linkend = q; | |
168 len = q + 1 - begin; | |
169 } | |
170 if (img) { | |
171 fwrite(link, 1, linkend - link, stdout); | |
172 fputs("\n", stdout); | |
173 } | |
174 else { | |
175 fwrite(link, 1, linkend - link, stdout); | |
176 fputs("\n", stdout); | |
177 | |
178 process(desc, descend, 0); | |
179 } | |
180 return len; | |
181 } | |
182 | |
183 int | |
184 dolist(const char *begin, const char *end, int newblock) | |
185 { | |
186 unsigned int i, j, indent, run, ul, isblock; | |
187 const char *p, *q; | |
188 char *buffer = NULL; | |
189 | |
190 isblock = 0; | |
191 if (newblock) | |
192 p = begin; | |
193 else if (*begin == '\n') | |
194 p = begin + 1; | |
195 else | |
196 return 0; | |
197 q = p; | |
198 if (*p == '-' || *p == '*' || *p == '+') | |
199 ul = 1; | |
200 else { | |
201 ul = 0; | |
202 for (; p < end && *p >= '0' && *p <= '9'; p++) | |
203 ; | |
204 if (p >= end || *p != '.') | |
205 return 0; | |
206 } | |
207 p++; | |
208 if (p >= end || !(*p == ' ' || *p == '\t')) | |
209 return 0; | |
210 for (p++; p != end && (*p == ' ' || *p == '\t'); p++) | |
211 ; | |
212 indent = p - q; | |
213 buffer = ereallocz(buffer, BUFSIZ); | |
214 run = 1; | |
215 for (; p < end && run; p++) { | |
216 for (i = 0; p < end && run; p++, i++) { | |
217 if (*p == '\n') { | |
218 if (p + 1 == end) | |
219 break; | |
220 else if (p[1] == '\n') { | |
221 p++; | |
222 ADDC(buffer, i) = '\n'; | |
223 i++; | |
224 run = 0; | |
225 isblock++; | |
226 } | |
227 q = p + 1; | |
228 j = 0; | |
229 if (ul && (*q == '-' || *q == '*' || *q … | |
230 j = 1; | |
231 else if (!ul) { | |
232 for (; q + j != end && q[j] >= '… | |
233 ; | |
234 if (q + j == end) | |
235 break; | |
236 if (j > 0 && q[j] == '.') | |
237 j++; | |
238 else | |
239 j = 0; | |
240 } | |
241 if (q + indent < end) | |
242 for (; (q[j] == ' ' || q[j] == '… | |
243 ; | |
244 if (j == indent) { | |
245 ADDC(buffer, i) = '\n'; | |
246 i++; | |
247 p += indent; | |
248 run = 1; | |
249 if (*q == ' ' || *q == '\t') | |
250 p++; | |
251 else | |
252 break; | |
253 } | |
254 } | |
255 ADDC(buffer, i) = *p; | |
256 } | |
257 ADDC(buffer, i) = '\0'; | |
258 process(buffer, buffer + i, isblock > 1 || (isblock == 1… | |
259 } | |
260 free(buffer); | |
261 p--; | |
262 while (*(--p) == '\n') | |
263 ; | |
264 return -(p - begin + 1); | |
265 } | |
266 | |
267 int | |
268 doparagraph(const char *begin, const char *end, int newblock) | |
269 { | |
270 const char *p; | |
271 | |
272 if (!newblock) | |
273 return 0; | |
274 p = strstr(begin, "\n\n"); | |
275 if (!p || p > end) | |
276 p = end; | |
277 if (p == begin) | |
278 return 0; | |
279 process(begin, p, 0); | |
280 return -(p - begin); | |
281 } | |
282 | |
283 int | |
284 doshortlink(const char *begin, const char *end, int newblock) | |
285 { | |
286 const char *p; | |
287 int ismail = 0; | |
288 | |
289 if (*begin != '<') | |
290 return 0; | |
291 for (p = begin + 1; p != end; p++) { | |
292 switch (*p) { | |
293 case ' ': | |
294 case '\t': | |
295 case '\n': | |
296 return 0; | |
297 case '#': | |
298 case ':': | |
299 ismail = -1; | |
300 break; | |
301 case '@': | |
302 if (ismail == 0) | |
303 ismail = 1; | |
304 break; | |
305 case '>': | |
306 if (ismail == 0) | |
307 return 0; | |
308 if (ismail != 1) { | |
309 fwrite(begin + 1, 1, p - begin - 1, stdo… | |
310 fputs("\n", stdout); | |
311 } | |
312 return p - begin + 1; | |
313 } | |
314 } | |
315 return 0; | |
316 } | |
317 | |
318 int | |
319 dosurround(const char *begin, const char *end, int newblock) | |
320 { | |
321 unsigned int i, l; | |
322 const char *p, *start, *stop; | |
323 | |
324 for (i = 0; i < LEN(surround); i++) { | |
325 l = strlen(surround[i].search); | |
326 if (end - begin < 2*l || strncmp(begin, surround[i].sear… | |
327 continue; | |
328 start = begin + l; | |
329 p = start - 1; | |
330 do { | |
331 stop = p; | |
332 p = strstr(p + 1, surround[i].search); | |
333 } while (p && p[-1] == '\\'); | |
334 if (p && p[-1] != '\\') | |
335 stop = p; | |
336 if (!stop || stop < start || stop >= end) | |
337 continue; | |
338 if (surround[i].process) | |
339 process(start, stop, 0); | |
340 else | |
341 hprint(start, stop); | |
342 return stop - begin + l; | |
343 } | |
344 return 0; | |
345 } | |
346 | |
347 int | |
348 dounderline(const char *begin, const char *end, int newblock) | |
349 { | |
350 unsigned int i, j, l; | |
351 const char *p; | |
352 | |
353 if (!newblock) | |
354 return 0; | |
355 p = begin; | |
356 for (l = 0; p + l != end && p[l] != '\n'; l++) | |
357 ; | |
358 p += l + 1; | |
359 if (l == 0) | |
360 return 0; | |
361 for (i = 0; i < LEN(underline); i++) { | |
362 for (j = 0; p + j != end && p[j] != '\n' && p[j] == unde… | |
363 ; | |
364 if (j >= l) { | |
365 if (underline[i].process) | |
366 process(begin, begin + l, 0); | |
367 else | |
368 hprint(begin, begin + l); | |
369 return -(j + p - begin); | |
370 } | |
371 } | |
372 return 0; | |
373 } | |
374 | |
375 void * | |
376 ereallocz(void *p, size_t size) | |
377 { | |
378 if (!(p = realloc(p, size))) | |
379 eprint("realloc: could not allocate %u bytes:", size); | |
380 return p; | |
381 } | |
382 | |
383 void | |
384 hprint(const char *begin, const char *end) | |
385 { | |
386 } | |
387 | |
388 void | |
389 process(const char *begin, const char *end, int newblock) | |
390 { | |
391 const char *p, *q; | |
392 int affected; | |
393 unsigned int i; | |
394 | |
395 for (p = begin; p < end;) { | |
396 if (newblock) | |
397 while (*p == '\n') | |
398 if (++p == end) | |
399 return; | |
400 affected = 0; | |
401 for (i = 0; i < LEN(parsers) && !affected; i++) | |
402 affected = parsers[i](p, end, newblock); | |
403 p += abs(affected); | |
404 if (!affected) { | |
405 p++; | |
406 } | |
407 for (q = p; q != end && *q == '\n'; q++) | |
408 ; | |
409 if (q == end) | |
410 return; | |
411 else if (p[0] == '\n' && p + 1 != end && p[1] == '\n') | |
412 newblock = 1; | |
413 else | |
414 newblock = (affected < 0); | |
415 } | |
416 } | |
417 | |
418 int | |
419 main(int argc, char **argv) | |
420 { | |
421 char *buffer; | |
422 int s; | |
423 size_t len, bsize; | |
424 | |
425 if (pledge("stdio", NULL) < 0) | |
426 eprint("pledge:"); | |
427 | |
428 bsize = 2 * READ_BUF_SIZ; | |
429 buffer = ereallocz(NULL, bsize); | |
430 len = 0; | |
431 while ((s = fread(buffer + len, 1, READ_BUF_SIZ, stdin))) { | |
432 len += s; | |
433 if (READ_BUF_SIZ + len + 1 > bsize) { | |
434 bsize += READ_BUF_SIZ; | |
435 if (!(buffer = realloc(buffer, bsize))) | |
436 eprint("realloc:"); | |
437 } | |
438 } | |
439 buffer[len] = '\0'; | |
440 process(buffer, buffer + len, 1); | |
441 free(buffer); | |
442 | |
443 return 0; | |
444 } |