Introduction
Introduction Statistics Contact Development Disclaimer Help
youtube.c - frontends - front-ends for some sites (experiment)
Log
Files
Refs
README
LICENSE
---
youtube.c (14893B)
---
1 #include <sys/socket.h>
2 #include <sys/types.h>
3
4 #include <ctype.h>
5 #include <errno.h>
6 #include <netdb.h>
7 #include <stdarg.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12
13 #include "https.h"
14 #include "json.h"
15 #include "util.h"
16 #include "youtube.h"
17
18 static long long
19 getnum(const char *s)
20 {
21 long long l;
22
23 l = strtoll(s, 0, 10);
24 if (l < 0)
25 l = 0;
26 return l;
27 }
28
29 static char *
30 youtube_request(const char *path)
31 {
32 return request("www.youtube.com", path, "");
33 }
34
35 static char *
36 request_video(const char *videoid)
37 {
38 char path[2048];
39 int r;
40
41 r = snprintf(path, sizeof(path), "/watch?v=%s", videoid);
42 /* check if request is too long (truncation) */
43 if (r < 0 || (size_t)r >= sizeof(path))
44 return NULL;
45
46 return youtube_request(path);
47 }
48
49 static char *
50 request_channel_videos(const char *channelid)
51 {
52 char path[2048];
53 int r;
54
55 r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid…
56 /* check if request is too long (truncation) */
57 if (r < 0 || (size_t)r >= sizeof(path))
58 return NULL;
59
60 return youtube_request(path);
61 }
62
63 static char *
64 request_user_videos(const char *user)
65 {
66 char path[2048];
67 int r;
68
69 r = snprintf(path, sizeof(path), "/user/%s/videos", user);
70 /* check if request is too long (truncation) */
71 if (r < 0 || (size_t)r >= sizeof(path))
72 return NULL;
73
74 return youtube_request(path);
75 }
76
77 static char *
78 request_search(const char *s, const char *page, const char *order)
79 {
80 char path[4096];
81
82 snprintf(path, sizeof(path), "/results?search_query=%s", s);
83
84 /* NOTE: pagination doesn't work at the moment:
85 this parameter is not supported anymore by Youtube */
86 if (page[0]) {
87 strlcat(path, "&page=", sizeof(path));
88 strlcat(path, page, sizeof(path));
89 }
90
91 if (order[0] && strcmp(order, "relevance")) {
92 strlcat(path, "&sp=", sizeof(path));
93 if (!strcmp(order, "date"))
94 strlcat(path, "CAI%3D", sizeof(path));
95 else if (!strcmp(order, "views"))
96 strlcat(path, "CAM%3D", sizeof(path));
97 else if (!strcmp(order, "rating"))
98 strlcat(path, "CAE%3D", sizeof(path));
99 }
100
101 /* check if request is too long (truncation) */
102 if (strlen(path) >= sizeof(path) - 1)
103 return NULL;
104
105 return youtube_request(path);
106 }
107
108 static int
109 extractjson_search(const char *s, const char **start, const char **end)
110 {
111 *start = strstr(s, "window[\"ytInitialData\"] = ");
112 if (*start) {
113 (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
114 } else {
115 *start = strstr(s, "var ytInitialData = ");
116 if (*start)
117 (*start) += sizeof("var ytInitialData = ") - 1;
118 }
119 if (!*start)
120 return -1;
121 *end = strstr(*start, "};\n");
122 if (!*end)
123 *end = strstr(*start, "}; \n");
124 if (!*end)
125 *end = strstr(*start, "};<");
126 if (!*end)
127 return -1;
128 (*end)++;
129
130 return 0;
131 }
132
133 static int
134 extractjson_video(const char *s, const char **start, const char **end)
135 {
136 *start = strstr(s, "var ytInitialPlayerResponse = ");
137 if (!*start)
138 return -1;
139 (*start) += sizeof("var ytInitialPlayerResponse = ") - 1;
140 *end = strstr(*start, "};<");
141 if (!*end)
142 return -1;
143 (*end)++;
144
145 return 0;
146 }
147
148 static int
149 isrenderername(const char *name)
150 {
151 return !strcmp(name, "videoRenderer");
152 }
153
154 static void
155 processnode_search(struct json_node *nodes, size_t depth, const char *va…
156 void *pp)
157 {
158 struct search_response *r = (struct search_response *)pp;
159 static struct item *item;
160
161 if (r->nitems > MAX_VIDEOS)
162 return;
163
164 /* new item, structures can be very deep, just check the end for:
165 (items|contents)[].videoRenderer objects */
166 if (depth >= 3 &&
167 nodes[depth - 1].type == JSON_TYPE_OBJECT &&
168 isrenderername(nodes[depth - 1].name)) {
169 r->nitems++;
170 return;
171 }
172
173 if (r->nitems == 0)
174 return;
175 item = &(r->items[r->nitems - 1]);
176
177 if (depth >= 4 &&
178 nodes[depth - 1].type == JSON_TYPE_STRING &&
179 isrenderername(nodes[depth - 2].name) &&
180 !strcmp(nodes[depth - 1].name, "videoId")) {
181 strlcpy(item->id, value, sizeof(item->id));
182 }
183
184 if (depth >= 7 &&
185 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
186 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
187 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
188 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
189 nodes[depth - 1].type == JSON_TYPE_STRING &&
190 isrenderername(nodes[depth - 5].name) &&
191 !strcmp(nodes[depth - 4].name, "title") &&
192 !strcmp(nodes[depth - 3].name, "runs") &&
193 !strcmp(nodes[depth - 1].name, "text") &&
194 !item->title[0]) {
195 strlcpy(item->title, value, sizeof(item->title));
196 }
197
198 /* in search listing there is a short description, string items …
199 if (depth >= 8 &&
200 nodes[depth - 7].type == JSON_TYPE_OBJECT &&
201 nodes[depth - 6].type == JSON_TYPE_ARRAY &&
202 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
203 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
204 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
205 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
206 nodes[depth - 1].type == JSON_TYPE_STRING &&
207 isrenderername(nodes[depth - 7].name) &&
208 !strcmp(nodes[depth - 6].name, "detailedMetadataSnippets") &&
209 !strcmp(nodes[depth - 4].name, "snippetText") &&
210 !strcmp(nodes[depth - 3].name, "runs") &&
211 !strcmp(nodes[depth - 1].name, "text")) {
212 strlcat(item->shortdescription, value, sizeof(item->shor…
213 }
214
215 /* in channel/user videos listing there is a short description, …
216 if (depth >= 7 &&
217 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
218 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
219 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
220 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
221 nodes[depth - 1].type == JSON_TYPE_STRING &&
222 isrenderername(nodes[depth - 5].name) &&
223 !strcmp(nodes[depth - 4].name, "descriptionSnippet") &&
224 !strcmp(nodes[depth - 3].name, "runs") &&
225 !strcmp(nodes[depth - 1].name, "text")) {
226 strlcat(item->shortdescription, value, sizeof(item->shor…
227 }
228
229 if (depth >= 5 &&
230 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
231 nodes[depth - 3].type == JSON_TYPE_OBJECT &&
232 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
233 nodes[depth - 1].type == JSON_TYPE_STRING &&
234 isrenderername(nodes[depth - 3].name) &&
235 !strcmp(nodes[depth - 1].name, "simpleText")) {
236 if (!strcmp(nodes[depth - 2].name, "viewCountText") &&
237 !item->viewcount[0]) {
238 strlcpy(item->viewcount, value, sizeof(item->vie…
239 } else if (!strcmp(nodes[depth - 2].name, "lengthText") …
240 !item->duration[0]) {
241 strlcpy(item->duration, value, sizeof(item->dura…
242 } else if (!strcmp(nodes[depth - 2].name, "publishedTime…
243 !item->publishedat[0]) {
244 strlcpy(item->publishedat, value, sizeof(item->p…
245 }
246 }
247
248 if (depth >= 9 &&
249 nodes[depth - 8].type == JSON_TYPE_OBJECT &&
250 nodes[depth - 7].type == JSON_TYPE_OBJECT &&
251 nodes[depth - 6].type == JSON_TYPE_OBJECT &&
252 nodes[depth - 5].type == JSON_TYPE_ARRAY &&
253 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
254 nodes[depth - 3].type == JSON_TYPE_OBJECT &&
255 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
256 nodes[depth - 1].type == JSON_TYPE_STRING &&
257 isrenderername(nodes[depth - 7].name) &&
258 !strcmp(nodes[depth - 6].name, "longBylineText") &&
259 !strcmp(nodes[depth - 5].name, "runs") &&
260 !strcmp(nodes[depth - 3].name, "navigationEndpoint") &&
261 !strcmp(nodes[depth - 2].name, "browseEndpoint")) {
262 if (!strcmp(nodes[depth - 1].name, "browseId")) {
263 strlcpy(item->channelid, value, sizeof(item->cha…
264 }
265 }
266
267 if (depth >= 7 &&
268 nodes[depth - 6].type == JSON_TYPE_OBJECT &&
269 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
270 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
271 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
272 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
273 nodes[depth - 1].type == JSON_TYPE_STRING &&
274 isrenderername(nodes[depth - 5].name) &&
275 !strcmp(nodes[depth - 4].name, "longBylineText") &&
276 !strcmp(nodes[depth - 3].name, "runs")) {
277 if (!strcmp(nodes[depth - 1].name, "text") &&
278 !item->channeltitle[0]) {
279 strlcpy(item->channeltitle, value, sizeof(item->…
280 }
281 }
282 }
283
284 static struct search_response *
285 parse_search_response(const char *data)
286 {
287 struct search_response *r;
288 struct item *item;
289 const char *s, *start, *end;
290 size_t i, len;
291 int ret;
292
293 if (!(s = strstr(data, "\r\n\r\n")))
294 return NULL; /* invalid response */
295 /* skip header */
296 s += strlen("\r\n\r\n");
297
298 if (!(r = calloc(1, sizeof(*r))))
299 return NULL;
300
301 if (extractjson_search(s, &start, &end) == -1) {
302 free(r);
303 return NULL;
304 }
305
306 ret = parsejson(start, end - start, processnode_search, r);
307 if (ret < 0) {
308 free(r);
309 return NULL;
310 }
311
312 /* workaround: sometimes playlists or topics are listed as chann…
313 these topic/playlist links away because they won't work for c…
314 JSON response would have to be parsed in a different way than…
315 for (i = 0; i < r->nitems; i++) {
316 item = &(r->items[i]);
317 len = strlen(item->channeltitle);
318
319 if (len > sizeof(" - Topic") &&
320 !strcmp(item->channeltitle + len - sizeof(" - Topic"…
321 /* reset information that doesn't work for topic…
322 item->channelid[0] = '\0';
323 item->viewcount[0] = '\0';
324 }
325 }
326
327 return r;
328 }
329
330 static void
331 processnode_video(struct json_node *nodes, size_t depth, const char *val…
332 void *pp)
333 {
334 struct video_response *r = (struct video_response *)pp;
335 struct video_format *f;
336
337 if (depth > 1) {
338 if (nodes[0].type == JSON_TYPE_OBJECT &&
339 !strcmp(nodes[1].name, "streamingData")) {
340 if (depth == 2 &&
341 nodes[2].type == JSON_TYPE_STRING &&
342 !strcmp(nodes[2].name, "expiresInSeconds")) {
343 r->expiresinseconds = getnum(value);
344 }
345
346 if (depth >= 3 &&
347 nodes[2].type == JSON_TYPE_ARRAY &&
348 (!strcmp(nodes[2].name, "formats") ||
349 !strcmp(nodes[2].name, "adaptiveFormats"))) {
350 if (r->nformats > MAX_FORMATS)
351 return; /* ignore: don't add too…
352
353 if (depth == 4 && nodes[3].type == JSON_…
354 r->nformats++;
355
356 if (r->nformats == 0)
357 return;
358 f = &(r->formats[r->nformats - 1]); /* c…
359
360 if (depth == 5 &&
361 nodes[2].type == JSON_TYPE_ARRAY &&
362 nodes[3].type == JSON_TYPE_OBJECT &&
363 (nodes[4].type == JSON_TYPE_STRING ||
364 nodes[4].type == JSON_TYPE_NUMBER ||
365 nodes[4].type == JSON_TYPE_BOOL)) {
366 if (!strcmp(nodes[4].name, "widt…
367 f->width = getnum(value);
368 } else if (!strcmp(nodes[4].name…
369 f->height = getnum(value…
370 } else if (!strcmp(nodes[4].name…
371 strlcpy(f->url, value, s…
372 } else if (!strcmp(nodes[4].name…
373 strlcpy(f->signatureciph…
374 } else if (!strcmp(nodes[4].name…
375 strlcpy(f->qualitylabel,…
376 } else if (!strcmp(nodes[4].name…
377 strlcpy(f->quality, valu…
378 } else if (!strcmp(nodes[4].name…
379 f->fps = getnum(value);
380 } else if (!strcmp(nodes[4].name…
381 f->bitrate = getnum(valu…
382 } else if (!strcmp(nodes[4].name…
383 f->averagebitrate = getn…
384 } else if (!strcmp(nodes[4].name…
385 strlcpy(f->mimetype, val…
386 } else if (!strcmp(nodes[4].name…
387 f->itag = getnum(value);
388 } else if (!strcmp(nodes[4].name…
389 f->contentlength = getnu…
390 } else if (!strcmp(nodes[4].name…
391 f->lastmodified = getnum…
392 } else if (!strcmp(nodes[4].name…
393 f->audiochannels = getnu…
394 } else if (!strcmp(nodes[4].name…
395 f->audiosamplerate = get…
396 }
397 }
398 }
399 }
400 }
401
402 if (depth == 4 &&
403 nodes[0].type == JSON_TYPE_OBJECT &&
404 nodes[1].type == JSON_TYPE_OBJECT &&
405 nodes[2].type == JSON_TYPE_OBJECT &&
406 nodes[3].type == JSON_TYPE_STRING &&
407 !strcmp(nodes[1].name, "microformat") &&
408 !strcmp(nodes[2].name, "playerMicroformatRenderer")) {
409 r->isfound = 1;
410
411 if (!strcmp(nodes[3].name, "publishDate")) {
412 strlcpy(r->publishdate, value, sizeof(r->publish…
413 } else if (!strcmp(nodes[3].name, "uploadDate")) {
414 strlcpy(r->uploaddate, value, sizeof(r->uploadda…
415 } else if (!strcmp(nodes[3].name, "category")) {
416 strlcpy(r->category, value, sizeof(r->category));
417 } else if (!strcmp(nodes[3].name, "isFamilySafe")) {
418 r->isfamilysafe = !strcmp(value, "true");
419 } else if (!strcmp(nodes[3].name, "isUnlisted")) {
420 r->isunlisted = !strcmp(value, "true");
421 }
422 }
423
424 if (depth == 3) {
425 if (nodes[0].type == JSON_TYPE_OBJECT &&
426 nodes[2].type == JSON_TYPE_STRING &&
427 !strcmp(nodes[1].name, "videoDetails")) {
428 r->isfound = 1;
429
430 if (!strcmp(nodes[2].name, "title")) {
431 strlcpy(r->title, value, sizeof(r->title…
432 } else if (!strcmp(nodes[2].name, "videoId")) {
433 strlcpy(r->id, value, sizeof(r->id));
434 } else if (!strcmp(nodes[2].name, "lengthSeconds…
435 r->lengthseconds = getnum(value);
436 } else if (!strcmp(nodes[2].name, "author")) {
437 strlcpy(r->author, value, sizeof(r->auth…
438 } else if (!strcmp(nodes[2].name, "viewCount")) {
439 r->viewcount = getnum(value);
440 } else if (!strcmp(nodes[2].name, "channelId")) {
441 strlcpy(r->channelid, value, sizeof(r->c…
442 } else if (!strcmp(nodes[2].name, "shortDescript…
443 strlcpy(r->shortdescription, value, size…
444 }
445 }
446 }
447 }
448
449 static struct video_response *
450 parse_video_response(const char *data)
451 {
452 struct video_response *r;
453 const char *s, *start, *end;
454 int ret;
455
456 if (!(s = strstr(data, "\r\n\r\n")))
457 return NULL; /* invalid response */
458 /* skip header */
459 s += strlen("\r\n\r\n");
460
461 if (!(r = calloc(1, sizeof(*r))))
462 return NULL;
463
464 if (extractjson_video(s, &start, &end) == -1) {
465 free(r);
466 return NULL;
467 }
468
469 ret = parsejson(start, end - start, processnode_video, r);
470 if (ret < 0) {
471 free(r);
472 return NULL;
473 }
474 return r;
475 }
476
477 struct search_response *
478 youtube_search(const char *rawsearch, const char *page, const char *orde…
479 {
480 const char *data;
481
482 if (!(data = request_search(rawsearch, page, order)))
483 return NULL;
484
485 return parse_search_response(data);
486 }
487
488 struct search_response *
489 youtube_channel_videos(const char *channelid)
490 {
491 const char *data;
492
493 if (!(data = request_channel_videos(channelid)))
494 return NULL;
495
496 return parse_search_response(data);
497 }
498
499 struct search_response *
500 youtube_user_videos(const char *user)
501 {
502 const char *data;
503
504 if (!(data = request_user_videos(user)))
505 return NULL;
506
507 return parse_search_response(data);
508 }
509
510 struct video_response *
511 youtube_video(const char *videoid)
512 {
513 const char *data;
514
515 if (!(data = request_video(videoid)))
516 return NULL;
517
518 return parse_video_response(data);
519 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.