Introduction
Introduction Statistics Contact Development Disclaimer Help
smu.c - smu - smu - simple markup (Markdown) processor (fork, fixes + features)
git clone git://git.codemadness.org/smu
Log
Files
Refs
README
LICENSE
---
smu.c (17096B)
---
1 #include <ctype.h>
2 #include <errno.h>
3 #include <stdarg.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #ifdef __OpenBSD__
9 #include <unistd.h>
10 #else
11 #define pledge(p1,p2) 0
12 #endif
13
14 #define LENGTH(x) sizeof(x)/sizeof(x[0])
15 #define ADDC(b,i) if (i % BUFSIZ == 0) { b = realloc(b, (i + BUFSIZ)); …
16
17 typedef int (*Parser)(const char *, const char *, int);
18 typedef struct {
19 char *search;
20 int process;
21 char *before, *after;
22 } Tag;
23
24 static int doamp(const char *begin, const char *end, int newblock); …
25 static int docomment(const char *begin, const char *end, int newblock); …
26 static int dogtlt(const char *begin, const char *end, int newblock); …
27 static int dohtml(const char *begin, const char *end, int newblock); …
28 static int dolineprefix(const char *begin, const char *end, int newblock…
29 static int dolink(const char *begin, const char *end, int newblock); …
30 static int dolist(const char *begin, const char *end, int newblock); …
31 static int doparagraph(const char *begin, const char *end, int newblock)…
32 static int doreplace(const char *begin, const char *end, int newblock); …
33 static int doshortlink(const char *begin, const char *end, int newblock)…
34 static int dosurround(const char *begin, const char *end, int newblock);…
35 static int dounderline(const char *begin, const char *end, int newblock)…
36 static void *ereallocz(void *p, size_t size);
37 static void hprint(const char *begin, const char *end); …
38 static void hprintattr(const char *begin, const char *end); …
39 static void process(const char *begin, const char *end, int isblock); …
40
41 /* list of parsers */
42 static Parser parsers[] = { dounderline, docomment, dolineprefix,
43 dolist, doparagraph, dogtlt, dosurround, dol…
44 doshortlink, dohtml, doamp, doreplace };
45 static int lazyimg = 0, nohtml = 0;
46
47 static Tag lineprefix[] = {
48 { " ", 0, "<pre><code>", "\n</code></pre>" },
49 { "\t", 0, "<pre><code>", "\n</code></pre>…
50 { ">", 2, "<blockquote>", "</blockq…
51 { "###### ", 1, "<h6>", "</h6>" },
52 { "##### ", 1, "<h5>", "</h5>" },
53 { "#### ", 1, "<h4>", "</h4>" },
54 { "### ", 1, "<h3>", "</h3>" },
55 { "## ", 1, "<h2>", "</h2>" },
56 { "# ", 1, "<h1>", "</h1>" …
57 { "- - -\n", 1, "<hr />", ""},
58 { "---\n", 1, "<hr />", ""},
59 };
60
61 static Tag underline[] = {
62 { "=", 1, "<h1>", "</h1>\n"…
63 { "-", 1, "<h2>", "</h2>\n"…
64 };
65
66 static Tag surround[] = {
67 { "``", 0, "<code>", "</code>" },
68 { "`", 0, "<code>", "</code>" },
69 { "___", 1, "<strong><em>", "</em></strong>…
70 { "***", 1, "<strong><em>", "</em></strong>…
71 { "__", 1, "<strong>", "</strong>" …
72 { "**", 1, "<strong>", "</strong>" …
73 { "_", 1, "<em>", "</em>" },
74 { "*", 1, "<em>", "</em>" },
75 };
76
77 static const char *replace[][2] = {
78 { "\\\\", "\\" },
79 { "\\`", "`" },
80 { "\\*", "*" },
81 { "\\_", "_" },
82 { "\\{", "{" },
83 { "\\}", "}" },
84 { "\\[", "[" },
85 { "\\]", "]" },
86 { "\\(", "(" },
87 { "\\)", ")" },
88 { "\\#", "#" },
89 { "\\+", "+" },
90 { "\\-", "-" },
91 { "\\.", "." },
92 { "\\!", "!" },
93 };
94
95 static const char *insert[][2] = {
96 { " \n", "<br />" },
97 };
98
99 void
100 eprint(const char *format, ...)
101 {
102 va_list ap;
103
104 va_start(ap, format);
105 vfprintf(stderr, format, ap);
106 va_end(ap);
107 exit(1);
108 }
109
110 int
111 doamp(const char *begin, const char *end, int newblock)
112 {
113 const char *p;
114
115 if (*begin != '&')
116 return 0;
117 if (!nohtml) {
118 for (p = begin + 1; p != end && !strchr("; \\\n\t", *p);…
119 ;
120 if (p == end || *p == ';')
121 return 0;
122 }
123 fputs("&amp;", stdout);
124 return 1;
125 }
126
127 int
128 dogtlt(const char *begin, const char *end, int newblock)
129 {
130 int brpos;
131 char c;
132
133 if (nohtml || begin + 1 >= end)
134 return 0;
135 brpos = begin[1] == '>';
136 if (!brpos && *begin != '<')
137 return 0;
138 c = begin[brpos ? 0 : 1];
139 if (!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
140 fputs("&lt;", stdout);
141 return 1;
142 } else if (brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')…
143 fprintf(stdout, "%c&gt;",c);
144 return 2;
145 }
146 return 0;
147 }
148
149 int
150 docomment(const char *begin, const char *end, int newblock)
151 {
152 char *p;
153
154 if (nohtml || strncmp("<!--", begin, 4))
155 return 0;
156 p = strstr(begin, "-->");
157 if (!p || p + 3 >= end)
158 return 0;
159 fprintf(stdout, "%.*s\n", (int)(p + 3 - begin), begin);
160 return (p + 3 - begin) * (newblock ? -1 : 1);
161 }
162
163 int
164 dohtml(const char *begin, const char *end, int newblock)
165 {
166 const char *p, *tag, *tagend;
167
168 if (nohtml || begin + 2 >= end)
169 return 0;
170 p = begin;
171 if (p[0] != '<' || !isalpha((unsigned char)p[1]))
172 return 0;
173 p++;
174 tag = p;
175 for (; isalnum((unsigned char)*p) && p < end; p++)
176 ;
177 tagend = p;
178 if (p > end || tag == tagend)
179 return 0;
180 while ((p = strstr(p, "</")) && p < end) {
181 p += 2;
182 if (strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag…
183 p++;
184 fwrite(begin, 1, p - begin + tagend - tag + 1, s…
185 return p - begin + tagend - tag + 1;
186 }
187 }
188 if ((p = strchr(tagend, '>'))) {
189 fwrite(begin, 1, p - begin + 2, stdout);
190 return p - begin + 2;
191 } else {
192 return 0;
193 }
194 }
195
196 int
197 dolineprefix(const char *begin, const char *end, int newblock)
198 {
199 unsigned int i, j, l;
200 char *buffer;
201 const char *p;
202
203 if (newblock)
204 p = begin;
205 else if (*begin == '\n')
206 p = begin + 1;
207 else
208 return 0;
209 for (i = 0; i < LENGTH(lineprefix); i++) {
210 l = strlen(lineprefix[i].search);
211 if (end - p < l)
212 continue;
213 if (strncmp(lineprefix[i].search, p, l))
214 continue;
215 if (*begin == '\n')
216 putc('\n', stdout);
217 fputs(lineprefix[i].before, stdout);
218 if (lineprefix[i].search[l-1] == '\n') {
219 putc('\n', stdout);
220 return l - 1;
221 }
222 if (!(buffer = malloc(BUFSIZ)))
223 eprint("malloc");
224 buffer[0] = '\0';
225
226 /* Collect lines into buffer while they start with the p…
227 j = 0;
228 while ((strncmp(lineprefix[i].search, p, l) == 0) && p +…
229 p += l;
230
231 /* Special case for blockquotes: optional space …
232 if (lineprefix[i].search[0] == '>' && *p == ' ')…
233 p++;
234 }
235
236 while (p < end) {
237 ADDC(buffer, j) = *p;
238 j++;
239 if (*(p++) == '\n')
240 break;
241 }
242 }
243
244 /* Skip empty lines in block */
245 while (*(buffer + j - 1) == '\n')
246 j--;
247
248 ADDC(buffer, j) = '\0';
249 if (lineprefix[i].process)
250 process(buffer, buffer + strlen(buffer), linepre…
251 else
252 hprint(buffer, buffer + strlen(buffer));
253 puts(lineprefix[i].after);
254 free(buffer);
255 return -(p - begin);
256 }
257 return 0;
258 }
259
260 int
261 dolink(const char *begin, const char *end, int newblock)
262 {
263 long width = 0, height = 0;
264 int img, len, parens_depth = 1;
265 char *numend;
266 const char *desc, *link, *p, *q, *descend, *linkend;
267 const char *title = NULL, *titleend = NULL;
268
269 if (*begin == '[')
270 img = 0;
271 else if (strncmp(begin, "![", 2) == 0)
272 img = 1;
273 else
274 return 0;
275 p = desc = begin + 1 + img;
276 if (!(p = strstr(desc, "](")) || p > end)
277 return 0;
278 for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q…
279 if (!(p = strstr(p + 1, "](")) || p > end)
280 return 0;
281 descend = p;
282 link = p + 2;
283
284 /* find end of link while handling nested parens */
285 q = link;
286 while (parens_depth) {
287 if (!(q = strpbrk(q, "()")) || q > end)
288 return 0;
289 if (*q == '(')
290 parens_depth++;
291 else
292 parens_depth--;
293 if (parens_depth && q < end)
294 q++;
295 }
296
297 linkend = q;
298 if (*link == '<' && *(linkend - 1) == '>') {
299 link++;
300 linkend--;
301 } else {
302 /* trim leading spaces */
303 for (p = link; p < q && isspace((unsigned char)*p); p++)
304 ;
305
306 for (link = p; p < q; p++) {
307 if (*p == '=' && img && p != link &&
308 isspace((unsigned char)p[-1])) {
309 /* image dimensions */
310 linkend = p;
311 width = strtol(++p, &numend, 10);
312 p = numend;
313 if (*numend == 'x')
314 height = strtol(++p, &numend, 10…
315 } else if ((*p == '\'' || *p == '"') && p != lin…
316 isspace((unsigned char)p[-1])) {
317 /* title attribute: for links and images…
318 linkend = p;
319 title = ++p;
320 if ((titleend = strchr(title, *(p - 1)))…
321 if (titleend >= q)
322 titleend = q;
323 else
324 p = titleend;
325 }
326 }
327 }
328
329 /* trim trailing spaces from link */
330 for (; linkend > link && isspace((unsigned char)linkend[…
331 ;
332 }
333
334 len = q + 1 - begin;
335 if (img) {
336 fputs("<img src=\"", stdout);
337 hprintattr(link, linkend);
338 fputs("\" alt=\"", stdout);
339 hprintattr(desc, descend);
340 fputs("\" ", stdout);
341 if (title && titleend && title != titleend) {
342 fputs("title=\"", stdout);
343 hprintattr(title, titleend);
344 fputs("\" ", stdout);
345 }
346 if (width > 0)
347 printf("width=\"%ld\" ", width);
348 if (height > 0)
349 printf("height=\"%ld\" ", height);
350 if (width > 0 && height > 0 && lazyimg)
351 fputs("loading=\"lazy\" ", stdout);
352 fputs("/>", stdout);
353 } else {
354 fputs("<a href=\"", stdout);
355 hprintattr(link, linkend);
356 fputs("\"", stdout);
357 if (title && titleend && title != titleend) {
358 fputs(" title=\"", stdout);
359 hprintattr(title, titleend);
360 fputs("\"", stdout);
361 }
362 fputs(">", stdout);
363 process(desc, descend, 0);
364 fputs("</a>", stdout);
365 }
366 return len;
367 }
368
369 int
370 dolist(const char *begin, const char *end, int newblock)
371 {
372 unsigned int i, j, indent, run, ul, isblock;
373 const char *p, *q;
374 char *buffer = NULL;
375 char marker;
376
377 isblock = 0;
378 if (newblock)
379 p = begin;
380 else if (*begin == '\n')
381 p = begin + 1;
382 else
383 return 0;
384 q = p;
385 if (*p == '-' || *p == '*' || *p == '+') {
386 ul = 1;
387 marker = *p;
388 } else {
389 ul = 0;
390 for (; p < end && *p >= '0' && *p <= '9'; p++)
391 ;
392 if (p >= end || *p != '.')
393 return 0;
394 }
395 p++;
396 if (p >= end || !(*p == ' ' || *p == '\t'))
397 return 0;
398 for (p++; p != end && (*p == ' ' || *p == '\t'); p++)
399 ;
400 indent = p - q;
401 buffer = ereallocz(buffer, BUFSIZ);
402 if (!newblock)
403 putc('\n', stdout);
404 fputs(ul ? "<ul>\n" : "<ol>\n", stdout);
405 run = 1;
406 for (; p < end && run; p++) {
407 for (i = 0; p < end && run; p++, i++) {
408 if (*p == '\n') {
409 if (p + 1 == end) {
410 break;
411 } else {
412 /* Handle empty lines */
413 for (q = p + 1; (*q == ' ' || *q…
414 ;
415 if (*q == '\n') {
416 ADDC(buffer, i) = '\n';
417 i++;
418 run = 0;
419 isblock++;
420 p = q;
421 }
422 }
423 q = p + 1;
424 j = 0;
425 if (ul && *q == marker) {
426 j = 1;
427 } else if (!ul) {
428 for (; q + j != end && q[j] >= '…
429 ;
430 if (q + j == end)
431 break;
432 if (j > 0 && q[j] == '.')
433 j++;
434 else
435 j = 0;
436 }
437 if (q + indent < end)
438 for (; (q[j] == ' ' || q[j] == '…
439 ;
440 if (j == indent) {
441 ADDC(buffer, i) = '\n';
442 i++;
443 p += indent;
444 run = 1;
445 if (*q == ' ' || *q == '\t')
446 p++;
447 else
448 break;
449 } else if (j < indent) {
450 run = 0;
451 }
452 }
453 ADDC(buffer, i) = *p;
454 }
455 ADDC(buffer, i) = '\0';
456 fputs("<li>", stdout);
457 process(buffer, buffer + i, isblock > 1 || (isblock == 1…
458 fputs("</li>\n", stdout);
459 }
460 fputs(ul ? "</ul>\n" : "</ol>\n", stdout);
461 free(buffer);
462 p--;
463 while (*(--p) == '\n')
464 ;
465
466 return -(p - begin + 1);
467 }
468
469 int
470 doparagraph(const char *begin, const char *end, int newblock)
471 {
472 const char *p;
473
474 if (!newblock)
475 return 0;
476 p = strstr(begin, "\n\n");
477 if (!p || p > end)
478 p = end;
479 if (p - begin <= 1)
480 return 0;
481 fputs("<p>", stdout);
482 process(begin, p, 0);
483 fputs("</p>\n", stdout);
484
485 return -(p - begin);
486 }
487
488 int
489 doreplace(const char *begin, const char *end, int newblock)
490 {
491 unsigned int i, l;
492
493 for (i = 0; i < LENGTH(insert); i++)
494 if (strncmp(insert[i][0], begin, strlen(insert[i][0])) =…
495 fputs(insert[i][1], stdout);
496 for (i = 0; i < LENGTH(replace); i++) {
497 l = strlen(replace[i][0]);
498 if (end - begin < l)
499 continue;
500 if (strncmp(replace[i][0], begin, l) == 0) {
501 fputs(replace[i][1], stdout);
502 return l;
503 }
504 }
505 return 0;
506 }
507
508 int
509 doshortlink(const char *begin, const char *end, int newblock)
510 {
511 const char *p, *c;
512 int ismail = 0;
513
514 if (*begin != '<')
515 return 0;
516 for (p = begin + 1; p != end; p++) {
517 switch(*p) {
518 case ' ':
519 case '\t':
520 case '\n':
521 return 0;
522 case '#':
523 case ':':
524 ismail = -1;
525 break;
526 case '@':
527 if (ismail == 0)
528 ismail = 1;
529 break;
530 case '>':
531 if (ismail == 0)
532 return 0;
533 fputs("<a href=\"", stdout);
534 if (ismail == 1) {
535 /* mailto: */
536 fputs("&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:"…
537 for (c = begin + 1; *c != '>'; c++)
538 fprintf(stdout, "&#%u;", *c);
539 fputs("\">", stdout);
540 for (c = begin + 1; *c != '>'; c++)
541 fprintf(stdout, "&#%u;", *c);
542 } else {
543 hprintattr(begin + 1, p);
544 fputs("\">", stdout);
545 hprint(begin + 1, p);
546 }
547 fputs("</a>", stdout);
548 return p - begin + 1;
549 }
550 }
551 return 0;
552 }
553
554 int
555 dosurround(const char *begin, const char *end, int newblock)
556 {
557 unsigned int i, l;
558 const char *p, *start, *stop;
559
560 for (i = 0; i < LENGTH(surround); i++) {
561 l = strlen(surround[i].search);
562 if (end - begin < 2*l || strncmp(begin, surround[i].sear…
563 continue;
564 start = begin + l;
565 p = start - 1;
566 do {
567 stop = p;
568 p = strstr(p + 1, surround[i].search);
569 } while (p && p[-1] == '\\');
570 if (p && p[-1] != '\\')
571 stop = p;
572 if (!stop || stop < start || stop >= end)
573 continue;
574 fputs(surround[i].before, stdout);
575
576 /* Single space at start and end are ignored */
577 if (*start == ' ' && *(stop - 1) == ' ') {
578 start++;
579 stop--;
580 l++;
581 }
582
583 if (surround[i].process)
584 process(start, stop, 0);
585 else
586 hprint(start, stop);
587 fputs(surround[i].after, stdout);
588 return stop - begin + l;
589 }
590 return 0;
591 }
592
593 int
594 dounderline(const char *begin, const char *end, int newblock)
595 {
596 unsigned int i, j, l;
597 const char *p;
598
599 if (!newblock)
600 return 0;
601 p = begin;
602 for (l = 0; p + l != end && p[l] != '\n'; l++)
603 ;
604 p += l + 1;
605 if (l == 0)
606 return 0;
607 for (i = 0; i < LENGTH(underline); i++) {
608 for (j = 0; p + j != end && p[j] != '\n' && p[j] == unde…
609 ;
610 if (j == l || (p[j] == '\n' && j > 3)) {
611 fputs(underline[i].before, stdout);
612 if (underline[i].process)
613 process(begin, begin + l, 0);
614 else
615 hprint(begin, begin + l);
616 fputs(underline[i].after, stdout);
617 return -(j + p - begin);
618 }
619 }
620 return 0;
621 }
622
623 void *
624 ereallocz(void *p, size_t size)
625 {
626 void *res;
627
628 res = realloc(p, size);
629 if (!res)
630 eprint("realloc: %zu bytes\n", size);
631 return res;
632 }
633
634 void
635 hprintattr(const char *begin, const char *end)
636 {
637 const char *p;
638
639 for (p = begin; p != end; p++) {
640 if (*p == '&')
641 fputs("&amp;", stdout);
642 else if (*p == '"')
643 fputs("&quot;", stdout);
644 else if (*p == '>')
645 fputs("&gt;", stdout);
646 else if (*p == '<')
647 fputs("&lt;", stdout);
648 else
649 putc(*p, stdout);
650 }
651 }
652
653 void
654 hprint(const char *begin, const char *end)
655 {
656 const char *p;
657
658 for (p = begin; p != end; p++) {
659 if (*p == '&')
660 fputs("&amp;", stdout);
661 else if (*p == '>')
662 fputs("&gt;", stdout);
663 else if (*p == '<')
664 fputs("&lt;", stdout);
665 else
666 putc(*p, stdout);
667 }
668 }
669
670 void
671 process(const char *begin, const char *end, int newblock)
672 {
673 const char *p, *q;
674 int affected;
675 unsigned int i;
676
677 for (p = begin; p < end;) {
678 if (newblock)
679 while (*p == '\n')
680 if (++p == end)
681 return;
682 affected = 0;
683 for (i = 0; i < LENGTH(parsers) && !affected; i++)
684 affected = parsers[i](p, end, newblock);
685 p += abs(affected);
686 if (!affected) {
687 if (nohtml)
688 hprint(p, p + 1);
689 else
690 putc(*p, stdout);
691 p++;
692 }
693 for (q = p; q != end && *q == '\n'; q++)
694 ;
695 if (q == end)
696 return;
697 else if (p[0] == '\n' && p + 1 != end && p[1] == '\n')
698 newblock = 1;
699 else
700 newblock = affected < 0;
701 }
702 }
703
704 void
705 usage(char **argv)
706 {
707 eprint("usage: %s [-l] [-n] [file]\n", argv[0]);
708 }
709
710 int
711 main(int argc, char *argv[])
712 {
713 FILE *source = stdin;
714 char *buffer = NULL;
715 int s, i;
716 unsigned long len, bsize;
717
718 for (i = 1; i < argc; i++) {
719 if (!strcmp("-v", argv[i])) {
720 eprint("smu v%s\n", VERSION);
721 } else if (!strcmp("-n", argv[i])) {
722 nohtml = 1;
723 } else if (!strcmp("-l", argv[i])) {
724 lazyimg = 1;
725 } else if (argv[i][0] != '-') {
726 break; /* file specified */
727 } else if (!strcmp("--", argv[i])) {
728 i++;
729 break;
730 } else {
731 usage(argv);
732 }
733 }
734 if (i < argc && !(source = fopen(argv[i], "r")))
735 eprint("fopen: %s: %s\n", argv[i], strerror(errno));
736
737 if (pledge("stdio", NULL) == -1)
738 eprint("pledge");
739
740 bsize = 2 * BUFSIZ;
741 buffer = ereallocz(buffer, bsize);
742 len = 0;
743 while ((s = fread(buffer + len, 1, BUFSIZ, source))) {
744 len += s;
745 if (BUFSIZ + len + 1 > bsize) {
746 bsize += BUFSIZ;
747 if (!(buffer = realloc(buffer, bsize)))
748 eprint("realloc");
749 }
750 }
751 buffer[len] = '\0';
752 process(buffer, buffer + len, 1);
753 free(buffer);
754 if (source != stdin)
755 fclose(source);
756
757 return 0;
758 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.