util.c - tscrape - twitter scraper (not working anymore) | |
git clone git://git.codemadness.org/tscrape | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
util.c (2290B) | |
--- | |
1 #include <sys/types.h> | |
2 | |
3 #include <ctype.h> | |
4 #include <errno.h> | |
5 #include <stdio.h> | |
6 #include <stdlib.h> | |
7 #include <string.h> | |
8 #include <time.h> | |
9 #include <wchar.h> | |
10 | |
11 #include "util.h" | |
12 | |
13 /* Read a field-separated line from 'fp', | |
14 * separated by a character 'separator', | |
15 * 'fields' is a list of pointers with a size of FieldLast (must be >0). | |
16 * 'line' buffer is allocated using malloc, 'size' will contain the allo… | |
17 * buffer size. | |
18 * returns: amount of fields read (>0) or -1 on error. */ | |
19 size_t | |
20 parseline(char *line, char *fields[FieldLast]) | |
21 { | |
22 char *prev, *s; | |
23 size_t i; | |
24 | |
25 for (prev = line, i = 0; | |
26 (s = strchr(prev, '\t')) && i < FieldLast - 1; | |
27 i++) { | |
28 *s = '\0'; | |
29 fields[i] = prev; | |
30 prev = s + 1; | |
31 } | |
32 fields[i++] = prev; | |
33 /* make non-parsed fields empty. */ | |
34 for (; i < FieldLast; i++) | |
35 fields[i] = ""; | |
36 | |
37 return i; | |
38 } | |
39 | |
40 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */ | |
41 int | |
42 strtotime(const char *s, time_t *t) | |
43 { | |
44 long long l; | |
45 char *e; | |
46 | |
47 errno = 0; | |
48 l = strtoll(s, &e, 10); | |
49 if (errno || *s == '\0' || *e) | |
50 return -1; | |
51 /* NOTE: assumes time_t is 64-bit on 64-bit platforms: | |
52 long long (atleast 32-bit) to time_t. */ | |
53 if (t) | |
54 *t = (time_t)l; | |
55 | |
56 return 0; | |
57 } | |
58 | |
59 /* Escape characters below as HTML 2.0 / XML 1.0. */ | |
60 void | |
61 xmlencode(const char *s, FILE *fp) | |
62 { | |
63 for (; *s; s++) { | |
64 switch(*s) { | |
65 case '<': fputs("<", fp); break; | |
66 case '>': fputs(">", fp); break; | |
67 case '\'': fputs("'", fp); break; | |
68 case '&': fputs("&", fp); break; | |
69 case '"': fputs(""", fp); break; | |
70 default: fputc(*s, fp); | |
71 } | |
72 } | |
73 } | |
74 | |
75 /* print `len' columns of characters. If string is shorter pad the rest … | |
76 * characters `pad`. */ | |
77 void | |
78 printutf8pad(FILE *fp, const char *s, size_t len, int pad) | |
79 { | |
80 wchar_t wc; | |
81 size_t col = 0, i, slen; | |
82 int rl, w; | |
83 | |
84 if (!len) | |
85 return; | |
86 | |
87 slen = strlen(s); | |
88 for (i = 0; i < slen; i += rl) { | |
89 rl = w = 1; | |
90 if ((unsigned char)s[i] < 32) | |
91 continue; | |
92 if ((unsigned char)s[i] >= 127) { | |
93 if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen… | |
94 break; | |
95 if ((w = wcwidth(wc)) == -1) | |
96 continue; | |
97 } | |
98 if (col + w > len || (col + w == len && s[i + rl])) { | |
99 fputs("\xe2\x80\xa6", fp); | |
100 col++; | |
101 break; | |
102 } | |
103 fwrite(&s[i], 1, rl, fp); | |
104 col += w; | |
105 } | |
106 for (; col < len; ++col) | |
107 putc(pad, fp); | |
108 } |