Introduction
Introduction Statistics Contact Development Disclaimer Help
Add example selenium script for the atom hackathon. - brcon2023-hackathons - Bi…
git clone git://bitreich.org/brcon2023-hackathons git://enlrupgkhuxnvlhsf6lc3fz…
Log
Files
Refs
Tags
---
commit a7cd0c547c792f74b7784cc0a8c806380a28ca2f
parent 2922c09dc4919dcea4ac331bbaa4e373ba4ccc4a
Author: Christoph Lohmann <[email protected]>
Date: Thu, 10 Aug 2023 16:10:01 +0200
Add example selenium script for the atom hackathon.
Diffstat:
A sfeed-atom/kvssachsen2atom | 121 +++++++++++++++++++++++++++++…
1 file changed, 121 insertions(+), 0 deletions(-)
---
diff --git a/sfeed-atom/kvssachsen2atom b/sfeed-atom/kvssachsen2atom
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copy me if you can.
+# by 20h
+#
+
+import os
+import sys
+import getopt
+
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options as chromeoptions
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.by import By
+
+from datetime import datetime
+import pytz
+
+def usage(app):
+ app = os.path.basename(app)
+ sys.stderr.write("usage: %s [-h] URI\n" % (app))
+ sys.exit(1)
+
+def main(args):
+ try:
+ opts, largs = getopt.getopt(args[1:], "h")
+ except getopt.GetoptError as err:
+ print(str(err))
+ usage(args[0])
+
+ for o, a in opts:
+ if o == "-h":
+ usage(args[0])
+ else:
+ assert False, "unhandled option"
+
+ if len(largs) < 1:
+ usage(args[0])
+
+ link = largs[0]
+
+ options = chromeoptions()
+ chromearguments = [
+ "headless",
+ "no-sandbox",
+ "disable-extensions",
+ "disable-dev-shm-usage",
+ "start-maximized",
+ "window-size=1900,1080",
+ "disable-gpu"
+ ]
+ for carg in chromearguments:
+ options.add_argument(carg)
+
+ driver = webdriver.Chrome(options=options)
+ driver.get(link)
+
+ isnews = WebDriverWait(driver=driver, timeout=60).until(
+ EC.presence_of_element_located((By.XPATH,
+ "//div[@data-last-letter]")
+ )
+ )
+ newslist = driver.find_elements(By.XPATH, "//div[@data-filter-target=\…
+
+ title = driver.find_elements(By.XPATH, "//meta[@property=\"og:title\"]…
+ description = title
+ globaltags = ""
+
+ print("""<?xml version="1.0" encoding="utf-8"?>""")
+ print("""<feed xmlns="http://www.w3.org/2005/Atom">""")
+ print("\t<title><![CDATA[%s]]></title>" % (title))
+ print("\t<subtitle><![CDATA[%s]]></subtitle>" % (description))
+ print("\t<id>%s</id>" % (link))
+ print("\t<link href=\"%s\" rel=\"self\" />" % (link))
+ print("\t<link href=\"%s\" />" % (link))
+
+ utcnow = datetime.now(pytz.utc)
+ print("\t<updated>%s</updated>" % (utcnow.isoformat()))
+
+ articles = newslist.find_elements(By.XPATH, "./div")
+ baselink = "/".join(link.split("/", 3)[:-1])
+ for article in articles[::-1]:
+ link = article.find_elements(By.XPATH, "./a")[0]
+ plink = link.get_attribute("href")
+ if not plink.startswith("http"):
+ plink = "%s/%s" % (baselink, plink)
+ ptitle = link.get_attribute("data-title")
+ pcontent = article.text
+ pauthor = "[email protected]"
+
+ # Normalize datetime.
+ updateds = article.find_elements(By.XPATH, ".//time")[0].text
+ try:
+ dtupdated = datetime.strptime(updateds, "%d.%m.%Y")
+ except ValueError:
+ continue
+
+ dtupdated = dtupdated.replace(hour=12, minute=0,\
+ second=0, tzinfo=pytz.utc)
+ if dtupdated.year > utcnow.year:
+ dtupdated = dtupdated.replace(year=utcnow.year)
+ pupdated = dtupdated
+
+ print("\t<entry>")
+ print("\t\t<id>%s</id>" % (plink))
+ print("\t\t<title><![CDATA[%s]]></title>" % (ptitle))
+ print("\t\t<link href=\"%s\" />" % (plink))
+ print("\t\t<author><name>%s</name></author>" % (pauthor))
+ print("\t\t<updated>%s</updated>" % (pupdated.isoformat()))
+ print("\t\t<content><![CDATA[%s]]></content>" % (pcontent))
+ print("\t</entry>")
+
+ print("</feed>")
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))
+
You are viewing proxied material from bitreich.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.