youtube.c - frontends - front-ends for some sites (experiment) | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
youtube.c (14893B) | |
--- | |
1 #include <sys/socket.h> | |
2 #include <sys/types.h> | |
3 | |
4 #include <ctype.h> | |
5 #include <errno.h> | |
6 #include <netdb.h> | |
7 #include <stdarg.h> | |
8 #include <stdio.h> | |
9 #include <stdlib.h> | |
10 #include <string.h> | |
11 #include <unistd.h> | |
12 | |
13 #include "https.h" | |
14 #include "json.h" | |
15 #include "util.h" | |
16 #include "youtube.h" | |
17 | |
18 static long long | |
19 getnum(const char *s) | |
20 { | |
21 long long l; | |
22 | |
23 l = strtoll(s, 0, 10); | |
24 if (l < 0) | |
25 l = 0; | |
26 return l; | |
27 } | |
28 | |
29 static char * | |
30 youtube_request(const char *path) | |
31 { | |
32 return request("www.youtube.com", path, ""); | |
33 } | |
34 | |
35 static char * | |
36 request_video(const char *videoid) | |
37 { | |
38 char path[2048]; | |
39 int r; | |
40 | |
41 r = snprintf(path, sizeof(path), "/watch?v=%s", videoid); | |
42 /* check if request is too long (truncation) */ | |
43 if (r < 0 || (size_t)r >= sizeof(path)) | |
44 return NULL; | |
45 | |
46 return youtube_request(path); | |
47 } | |
48 | |
49 static char * | |
50 request_channel_videos(const char *channelid) | |
51 { | |
52 char path[2048]; | |
53 int r; | |
54 | |
55 r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid… | |
56 /* check if request is too long (truncation) */ | |
57 if (r < 0 || (size_t)r >= sizeof(path)) | |
58 return NULL; | |
59 | |
60 return youtube_request(path); | |
61 } | |
62 | |
63 static char * | |
64 request_user_videos(const char *user) | |
65 { | |
66 char path[2048]; | |
67 int r; | |
68 | |
69 r = snprintf(path, sizeof(path), "/user/%s/videos", user); | |
70 /* check if request is too long (truncation) */ | |
71 if (r < 0 || (size_t)r >= sizeof(path)) | |
72 return NULL; | |
73 | |
74 return youtube_request(path); | |
75 } | |
76 | |
77 static char * | |
78 request_search(const char *s, const char *page, const char *order) | |
79 { | |
80 char path[4096]; | |
81 | |
82 snprintf(path, sizeof(path), "/results?search_query=%s", s); | |
83 | |
84 /* NOTE: pagination doesn't work at the moment: | |
85 this parameter is not supported anymore by Youtube */ | |
86 if (page[0]) { | |
87 strlcat(path, "&page=", sizeof(path)); | |
88 strlcat(path, page, sizeof(path)); | |
89 } | |
90 | |
91 if (order[0] && strcmp(order, "relevance")) { | |
92 strlcat(path, "&sp=", sizeof(path)); | |
93 if (!strcmp(order, "date")) | |
94 strlcat(path, "CAI%3D", sizeof(path)); | |
95 else if (!strcmp(order, "views")) | |
96 strlcat(path, "CAM%3D", sizeof(path)); | |
97 else if (!strcmp(order, "rating")) | |
98 strlcat(path, "CAE%3D", sizeof(path)); | |
99 } | |
100 | |
101 /* check if request is too long (truncation) */ | |
102 if (strlen(path) >= sizeof(path) - 1) | |
103 return NULL; | |
104 | |
105 return youtube_request(path); | |
106 } | |
107 | |
108 static int | |
109 extractjson_search(const char *s, const char **start, const char **end) | |
110 { | |
111 *start = strstr(s, "window[\"ytInitialData\"] = "); | |
112 if (*start) { | |
113 (*start) += sizeof("window[\"ytInitialData\"] = ") - 1; | |
114 } else { | |
115 *start = strstr(s, "var ytInitialData = "); | |
116 if (*start) | |
117 (*start) += sizeof("var ytInitialData = ") - 1; | |
118 } | |
119 if (!*start) | |
120 return -1; | |
121 *end = strstr(*start, "};\n"); | |
122 if (!*end) | |
123 *end = strstr(*start, "}; \n"); | |
124 if (!*end) | |
125 *end = strstr(*start, "};<"); | |
126 if (!*end) | |
127 return -1; | |
128 (*end)++; | |
129 | |
130 return 0; | |
131 } | |
132 | |
133 static int | |
134 extractjson_video(const char *s, const char **start, const char **end) | |
135 { | |
136 *start = strstr(s, "var ytInitialPlayerResponse = "); | |
137 if (!*start) | |
138 return -1; | |
139 (*start) += sizeof("var ytInitialPlayerResponse = ") - 1; | |
140 *end = strstr(*start, "};<"); | |
141 if (!*end) | |
142 return -1; | |
143 (*end)++; | |
144 | |
145 return 0; | |
146 } | |
147 | |
148 static int | |
149 isrenderername(const char *name) | |
150 { | |
151 return !strcmp(name, "videoRenderer"); | |
152 } | |
153 | |
154 static void | |
155 processnode_search(struct json_node *nodes, size_t depth, const char *va… | |
156 void *pp) | |
157 { | |
158 struct search_response *r = (struct search_response *)pp; | |
159 static struct item *item; | |
160 | |
161 if (r->nitems > MAX_VIDEOS) | |
162 return; | |
163 | |
164 /* new item, structures can be very deep, just check the end for: | |
165 (items|contents)[].videoRenderer objects */ | |
166 if (depth >= 3 && | |
167 nodes[depth - 1].type == JSON_TYPE_OBJECT && | |
168 isrenderername(nodes[depth - 1].name)) { | |
169 r->nitems++; | |
170 return; | |
171 } | |
172 | |
173 if (r->nitems == 0) | |
174 return; | |
175 item = &(r->items[r->nitems - 1]); | |
176 | |
177 if (depth >= 4 && | |
178 nodes[depth - 1].type == JSON_TYPE_STRING && | |
179 isrenderername(nodes[depth - 2].name) && | |
180 !strcmp(nodes[depth - 1].name, "videoId")) { | |
181 strlcpy(item->id, value, sizeof(item->id)); | |
182 } | |
183 | |
184 if (depth >= 7 && | |
185 nodes[depth - 5].type == JSON_TYPE_OBJECT && | |
186 nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
187 nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
188 nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
189 nodes[depth - 1].type == JSON_TYPE_STRING && | |
190 isrenderername(nodes[depth - 5].name) && | |
191 !strcmp(nodes[depth - 4].name, "title") && | |
192 !strcmp(nodes[depth - 3].name, "runs") && | |
193 !strcmp(nodes[depth - 1].name, "text") && | |
194 !item->title[0]) { | |
195 strlcpy(item->title, value, sizeof(item->title)); | |
196 } | |
197 | |
198 /* in search listing there is a short description, string items … | |
199 if (depth >= 8 && | |
200 nodes[depth - 7].type == JSON_TYPE_OBJECT && | |
201 nodes[depth - 6].type == JSON_TYPE_ARRAY && | |
202 nodes[depth - 5].type == JSON_TYPE_OBJECT && | |
203 nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
204 nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
205 nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
206 nodes[depth - 1].type == JSON_TYPE_STRING && | |
207 isrenderername(nodes[depth - 7].name) && | |
208 !strcmp(nodes[depth - 6].name, "detailedMetadataSnippets") && | |
209 !strcmp(nodes[depth - 4].name, "snippetText") && | |
210 !strcmp(nodes[depth - 3].name, "runs") && | |
211 !strcmp(nodes[depth - 1].name, "text")) { | |
212 strlcat(item->shortdescription, value, sizeof(item->shor… | |
213 } | |
214 | |
215 /* in channel/user videos listing there is a short description, … | |
216 if (depth >= 7 && | |
217 nodes[depth - 5].type == JSON_TYPE_OBJECT && | |
218 nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
219 nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
220 nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
221 nodes[depth - 1].type == JSON_TYPE_STRING && | |
222 isrenderername(nodes[depth - 5].name) && | |
223 !strcmp(nodes[depth - 4].name, "descriptionSnippet") && | |
224 !strcmp(nodes[depth - 3].name, "runs") && | |
225 !strcmp(nodes[depth - 1].name, "text")) { | |
226 strlcat(item->shortdescription, value, sizeof(item->shor… | |
227 } | |
228 | |
229 if (depth >= 5 && | |
230 nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
231 nodes[depth - 3].type == JSON_TYPE_OBJECT && | |
232 nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
233 nodes[depth - 1].type == JSON_TYPE_STRING && | |
234 isrenderername(nodes[depth - 3].name) && | |
235 !strcmp(nodes[depth - 1].name, "simpleText")) { | |
236 if (!strcmp(nodes[depth - 2].name, "viewCountText") && | |
237 !item->viewcount[0]) { | |
238 strlcpy(item->viewcount, value, sizeof(item->vie… | |
239 } else if (!strcmp(nodes[depth - 2].name, "lengthText") … | |
240 !item->duration[0]) { | |
241 strlcpy(item->duration, value, sizeof(item->dura… | |
242 } else if (!strcmp(nodes[depth - 2].name, "publishedTime… | |
243 !item->publishedat[0]) { | |
244 strlcpy(item->publishedat, value, sizeof(item->p… | |
245 } | |
246 } | |
247 | |
248 if (depth >= 9 && | |
249 nodes[depth - 8].type == JSON_TYPE_OBJECT && | |
250 nodes[depth - 7].type == JSON_TYPE_OBJECT && | |
251 nodes[depth - 6].type == JSON_TYPE_OBJECT && | |
252 nodes[depth - 5].type == JSON_TYPE_ARRAY && | |
253 nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
254 nodes[depth - 3].type == JSON_TYPE_OBJECT && | |
255 nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
256 nodes[depth - 1].type == JSON_TYPE_STRING && | |
257 isrenderername(nodes[depth - 7].name) && | |
258 !strcmp(nodes[depth - 6].name, "longBylineText") && | |
259 !strcmp(nodes[depth - 5].name, "runs") && | |
260 !strcmp(nodes[depth - 3].name, "navigationEndpoint") && | |
261 !strcmp(nodes[depth - 2].name, "browseEndpoint")) { | |
262 if (!strcmp(nodes[depth - 1].name, "browseId")) { | |
263 strlcpy(item->channelid, value, sizeof(item->cha… | |
264 } | |
265 } | |
266 | |
267 if (depth >= 7 && | |
268 nodes[depth - 6].type == JSON_TYPE_OBJECT && | |
269 nodes[depth - 5].type == JSON_TYPE_OBJECT && | |
270 nodes[depth - 4].type == JSON_TYPE_OBJECT && | |
271 nodes[depth - 3].type == JSON_TYPE_ARRAY && | |
272 nodes[depth - 2].type == JSON_TYPE_OBJECT && | |
273 nodes[depth - 1].type == JSON_TYPE_STRING && | |
274 isrenderername(nodes[depth - 5].name) && | |
275 !strcmp(nodes[depth - 4].name, "longBylineText") && | |
276 !strcmp(nodes[depth - 3].name, "runs")) { | |
277 if (!strcmp(nodes[depth - 1].name, "text") && | |
278 !item->channeltitle[0]) { | |
279 strlcpy(item->channeltitle, value, sizeof(item->… | |
280 } | |
281 } | |
282 } | |
283 | |
284 static struct search_response * | |
285 parse_search_response(const char *data) | |
286 { | |
287 struct search_response *r; | |
288 struct item *item; | |
289 const char *s, *start, *end; | |
290 size_t i, len; | |
291 int ret; | |
292 | |
293 if (!(s = strstr(data, "\r\n\r\n"))) | |
294 return NULL; /* invalid response */ | |
295 /* skip header */ | |
296 s += strlen("\r\n\r\n"); | |
297 | |
298 if (!(r = calloc(1, sizeof(*r)))) | |
299 return NULL; | |
300 | |
301 if (extractjson_search(s, &start, &end) == -1) { | |
302 free(r); | |
303 return NULL; | |
304 } | |
305 | |
306 ret = parsejson(start, end - start, processnode_search, r); | |
307 if (ret < 0) { | |
308 free(r); | |
309 return NULL; | |
310 } | |
311 | |
312 /* workaround: sometimes playlists or topics are listed as chann… | |
313 these topic/playlist links away because they won't work for c… | |
314 JSON response would have to be parsed in a different way than… | |
315 for (i = 0; i < r->nitems; i++) { | |
316 item = &(r->items[i]); | |
317 len = strlen(item->channeltitle); | |
318 | |
319 if (len > sizeof(" - Topic") && | |
320 !strcmp(item->channeltitle + len - sizeof(" - Topic"… | |
321 /* reset information that doesn't work for topic… | |
322 item->channelid[0] = '\0'; | |
323 item->viewcount[0] = '\0'; | |
324 } | |
325 } | |
326 | |
327 return r; | |
328 } | |
329 | |
330 static void | |
331 processnode_video(struct json_node *nodes, size_t depth, const char *val… | |
332 void *pp) | |
333 { | |
334 struct video_response *r = (struct video_response *)pp; | |
335 struct video_format *f; | |
336 | |
337 if (depth > 1) { | |
338 if (nodes[0].type == JSON_TYPE_OBJECT && | |
339 !strcmp(nodes[1].name, "streamingData")) { | |
340 if (depth == 2 && | |
341 nodes[2].type == JSON_TYPE_STRING && | |
342 !strcmp(nodes[2].name, "expiresInSeconds")) { | |
343 r->expiresinseconds = getnum(value); | |
344 } | |
345 | |
346 if (depth >= 3 && | |
347 nodes[2].type == JSON_TYPE_ARRAY && | |
348 (!strcmp(nodes[2].name, "formats") || | |
349 !strcmp(nodes[2].name, "adaptiveFormats"))) { | |
350 if (r->nformats > MAX_FORMATS) | |
351 return; /* ignore: don't add too… | |
352 | |
353 if (depth == 4 && nodes[3].type == JSON_… | |
354 r->nformats++; | |
355 | |
356 if (r->nformats == 0) | |
357 return; | |
358 f = &(r->formats[r->nformats - 1]); /* c… | |
359 | |
360 if (depth == 5 && | |
361 nodes[2].type == JSON_TYPE_ARRAY && | |
362 nodes[3].type == JSON_TYPE_OBJECT && | |
363 (nodes[4].type == JSON_TYPE_STRING || | |
364 nodes[4].type == JSON_TYPE_NUMBER || | |
365 nodes[4].type == JSON_TYPE_BOOL)) { | |
366 if (!strcmp(nodes[4].name, "widt… | |
367 f->width = getnum(value); | |
368 } else if (!strcmp(nodes[4].name… | |
369 f->height = getnum(value… | |
370 } else if (!strcmp(nodes[4].name… | |
371 strlcpy(f->url, value, s… | |
372 } else if (!strcmp(nodes[4].name… | |
373 strlcpy(f->signatureciph… | |
374 } else if (!strcmp(nodes[4].name… | |
375 strlcpy(f->qualitylabel,… | |
376 } else if (!strcmp(nodes[4].name… | |
377 strlcpy(f->quality, valu… | |
378 } else if (!strcmp(nodes[4].name… | |
379 f->fps = getnum(value); | |
380 } else if (!strcmp(nodes[4].name… | |
381 f->bitrate = getnum(valu… | |
382 } else if (!strcmp(nodes[4].name… | |
383 f->averagebitrate = getn… | |
384 } else if (!strcmp(nodes[4].name… | |
385 strlcpy(f->mimetype, val… | |
386 } else if (!strcmp(nodes[4].name… | |
387 f->itag = getnum(value); | |
388 } else if (!strcmp(nodes[4].name… | |
389 f->contentlength = getnu… | |
390 } else if (!strcmp(nodes[4].name… | |
391 f->lastmodified = getnum… | |
392 } else if (!strcmp(nodes[4].name… | |
393 f->audiochannels = getnu… | |
394 } else if (!strcmp(nodes[4].name… | |
395 f->audiosamplerate = get… | |
396 } | |
397 } | |
398 } | |
399 } | |
400 } | |
401 | |
402 if (depth == 4 && | |
403 nodes[0].type == JSON_TYPE_OBJECT && | |
404 nodes[1].type == JSON_TYPE_OBJECT && | |
405 nodes[2].type == JSON_TYPE_OBJECT && | |
406 nodes[3].type == JSON_TYPE_STRING && | |
407 !strcmp(nodes[1].name, "microformat") && | |
408 !strcmp(nodes[2].name, "playerMicroformatRenderer")) { | |
409 r->isfound = 1; | |
410 | |
411 if (!strcmp(nodes[3].name, "publishDate")) { | |
412 strlcpy(r->publishdate, value, sizeof(r->publish… | |
413 } else if (!strcmp(nodes[3].name, "uploadDate")) { | |
414 strlcpy(r->uploaddate, value, sizeof(r->uploadda… | |
415 } else if (!strcmp(nodes[3].name, "category")) { | |
416 strlcpy(r->category, value, sizeof(r->category)); | |
417 } else if (!strcmp(nodes[3].name, "isFamilySafe")) { | |
418 r->isfamilysafe = !strcmp(value, "true"); | |
419 } else if (!strcmp(nodes[3].name, "isUnlisted")) { | |
420 r->isunlisted = !strcmp(value, "true"); | |
421 } | |
422 } | |
423 | |
424 if (depth == 3) { | |
425 if (nodes[0].type == JSON_TYPE_OBJECT && | |
426 nodes[2].type == JSON_TYPE_STRING && | |
427 !strcmp(nodes[1].name, "videoDetails")) { | |
428 r->isfound = 1; | |
429 | |
430 if (!strcmp(nodes[2].name, "title")) { | |
431 strlcpy(r->title, value, sizeof(r->title… | |
432 } else if (!strcmp(nodes[2].name, "videoId")) { | |
433 strlcpy(r->id, value, sizeof(r->id)); | |
434 } else if (!strcmp(nodes[2].name, "lengthSeconds… | |
435 r->lengthseconds = getnum(value); | |
436 } else if (!strcmp(nodes[2].name, "author")) { | |
437 strlcpy(r->author, value, sizeof(r->auth… | |
438 } else if (!strcmp(nodes[2].name, "viewCount")) { | |
439 r->viewcount = getnum(value); | |
440 } else if (!strcmp(nodes[2].name, "channelId")) { | |
441 strlcpy(r->channelid, value, sizeof(r->c… | |
442 } else if (!strcmp(nodes[2].name, "shortDescript… | |
443 strlcpy(r->shortdescription, value, size… | |
444 } | |
445 } | |
446 } | |
447 } | |
448 | |
449 static struct video_response * | |
450 parse_video_response(const char *data) | |
451 { | |
452 struct video_response *r; | |
453 const char *s, *start, *end; | |
454 int ret; | |
455 | |
456 if (!(s = strstr(data, "\r\n\r\n"))) | |
457 return NULL; /* invalid response */ | |
458 /* skip header */ | |
459 s += strlen("\r\n\r\n"); | |
460 | |
461 if (!(r = calloc(1, sizeof(*r)))) | |
462 return NULL; | |
463 | |
464 if (extractjson_video(s, &start, &end) == -1) { | |
465 free(r); | |
466 return NULL; | |
467 } | |
468 | |
469 ret = parsejson(start, end - start, processnode_video, r); | |
470 if (ret < 0) { | |
471 free(r); | |
472 return NULL; | |
473 } | |
474 return r; | |
475 } | |
476 | |
477 struct search_response * | |
478 youtube_search(const char *rawsearch, const char *page, const char *orde… | |
479 { | |
480 const char *data; | |
481 | |
482 if (!(data = request_search(rawsearch, page, order))) | |
483 return NULL; | |
484 | |
485 return parse_search_response(data); | |
486 } | |
487 | |
488 struct search_response * | |
489 youtube_channel_videos(const char *channelid) | |
490 { | |
491 const char *data; | |
492 | |
493 if (!(data = request_channel_videos(channelid))) | |
494 return NULL; | |
495 | |
496 return parse_search_response(data); | |
497 } | |
498 | |
499 struct search_response * | |
500 youtube_user_videos(const char *user) | |
501 { | |
502 const char *data; | |
503 | |
504 if (!(data = request_user_videos(user))) | |
505 return NULL; | |
506 | |
507 return parse_search_response(data); | |
508 } | |
509 | |
510 struct video_response * | |
511 youtube_video(const char *videoid) | |
512 { | |
513 const char *data; | |
514 | |
515 if (!(data = request_video(videoid))) | |
516 return NULL; | |
517 | |
518 return parse_video_response(data); | |
519 } |