Introduction
Introduction Statistics Contact Development Disclaimer Help
kvssachsen2atom - brcon2023-hackathons - Bitreichcon 2023 Hackathon Repository
git clone git://bitreich.org/brcon2023-hackathons git://enlrupgkhuxnvlhsf6lc3fz…
Log
Files
Refs
Tags
---
kvssachsen2atom (3188B)
---
1 #!/usr/bin/env python
2 # coding=utf-8
3 #
4 # Copy me if you can.
5 # by 20h
6 #
7
8 import os
9 import sys
10 import getopt
11
12 from selenium import webdriver
13 from selenium.webdriver.chrome.options import Options as chromeoptions
14 from selenium.webdriver.support.ui import WebDriverWait
15 from selenium.webdriver.support import expected_conditions as EC
16 from selenium.webdriver.common.by import By
17
18 from datetime import datetime
19 import pytz
20
21 def usage(app):
22 app = os.path.basename(app)
23 sys.stderr.write("usage: %s [-h] URI\n" % (app))
24 sys.exit(1)
25
26 def main(args):
27 try:
28 opts, largs = getopt.getopt(args[1:], "h")
29 except getopt.GetoptError as err:
30 print(str(err))
31 usage(args[0])
32
33 for o, a in opts:
34 if o == "-h":
35 usage(args[0])
36 else:
37 assert False, "unhandled option"
38
39 if len(largs) < 1:
40 usage(args[0])
41
42 link = largs[0]
43
44 options = chromeoptions()
45 chromearguments = [
46 "headless",
47 "no-sandbox",
48 "disable-extensions",
49 "disable-dev-shm-usage",
50 "start-maximized",
51 "window-size=1900,1080",
52 "disable-gpu"
53 ]
54 for carg in chromearguments:
55 options.add_argument(carg)
56
57 driver = webdriver.Chrome(options=options)
58 driver.get(link)
59
60 isnews = WebDriverWait(driver=driver, timeout=60).until(
61 EC.presence_of_element_located((By.XPATH,
62 "//div[@data-last-letter]")
63 )
64 )
65 newslist = driver.find_elements(By.XPATH, "//div[@data-filter-ta…
66
67 title = driver.find_elements(By.XPATH, "//meta[@property=\"og:ti…
68 description = title
69 globaltags = ""
70
71 print("""<?xml version="1.0" encoding="utf-8"?>""")
72 print("""<feed xmlns="http://www.w3.org/2005/Atom">""")
73 print("\t<title><![CDATA[%s]]></title>" % (title))
74 print("\t<subtitle><![CDATA[%s]]></subtitle>" % (description))
75 print("\t<id>%s</id>" % (link))
76 print("\t<link href=\"%s\" rel=\"self\" />" % (link))
77 print("\t<link href=\"%s\" />" % (link))
78
79 utcnow = datetime.now(pytz.utc)
80 print("\t<updated>%s</updated>" % (utcnow.isoformat()))
81
82 articles = newslist.find_elements(By.XPATH, "./div")
83 baselink = "/".join(link.split("/", 3)[:-1])
84 for article in articles[::-1]:
85 link = article.find_elements(By.XPATH, "./a")[0]
86 plink = link.get_attribute("href")
87 if not plink.startswith("http"):
88 plink = "%s/%s" % (baselink, plink)
89 ptitle = link.get_attribute("data-title")
90 pcontent = article.text
91 pauthor = "[email protected]"
92
93 # Normalize datetime.
94 updateds = article.find_elements(By.XPATH, ".//time")[0]…
95 try:
96 dtupdated = datetime.strptime(updateds, "%d.%m.%…
97 except ValueError:
98 continue
99
100 dtupdated = dtupdated.replace(hour=12, minute=0,\
101 second=0, tzinfo=pytz.utc)
102 if dtupdated.year > utcnow.year:
103 dtupdated = dtupdated.replace(year=utcnow.year)
104 pupdated = dtupdated
105
106 print("\t<entry>")
107 print("\t\t<id>%s</id>" % (plink))
108 print("\t\t<title><![CDATA[%s]]></title>" % (ptitle))
109 print("\t\t<link href=\"%s\" />" % (plink))
110 print("\t\t<author><name>%s</name></author>" % (pauthor))
111 print("\t\t<updated>%s</updated>" % (pupdated.isoformat(…
112 print("\t\t<content><![CDATA[%s]]></content>" % (pconten…
113 print("\t</entry>")
114
115 print("</feed>")
116
117 return 0
118
119 if __name__ == "__main__":
120 sys.exit(main(sys.argv))
121
You are viewing proxied material from bitreich.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.