GopherProxy

	Add example selenium script for the atom hackathon. - brcon2023-hackathons - Bi…
	git clone git://bitreich.org/brcon2023-hackathons git://enlrupgkhuxnvlhsf6lc3fz…
	Log
	Files
	Refs
	Tags
	---
	commit a7cd0c547c792f74b7784cc0a8c806380a28ca2f
	parent 2922c09dc4919dcea4ac331bbaa4e373ba4ccc4a
	Author: Christoph Lohmann <[email protected]>
	Date: Thu, 10 Aug 2023 16:10:01 +0200

	Add example selenium script for the atom hackathon.

	Diffstat:
	A sfeed-atom/kvssachsen2atom \| 121 +++++++++++++++++++++++++++++…

	1 file changed, 121 insertions(+), 0 deletions(-)
	---
	diff --git a/sfeed-atom/kvssachsen2atom b/sfeed-atom/kvssachsen2atom
	@@ -0,0 +1,121 @@
	+#!/usr/bin/env python
	+# coding=utf-8
	+#
	+# Copy me if you can.
	+# by 20h
	+#
	+
	+import os
	+import sys
	+import getopt
	+
	+from selenium import webdriver
	+from selenium.webdriver.chrome.options import Options as chromeoptions
	+from selenium.webdriver.support.ui import WebDriverWait
	+from selenium.webdriver.support import expected_conditions as EC
	+from selenium.webdriver.common.by import By
	+
	+from datetime import datetime
	+import pytz
	+
	+def usage(app):
	+ app = os.path.basename(app)
	+ sys.stderr.write("usage: %s [-h] URI\n" % (app))
	+ sys.exit(1)
	+
	+def main(args):
	+ try:
	+ opts, largs = getopt.getopt(args[1:], "h")
	+ except getopt.GetoptError as err:
	+ print(str(err))
	+ usage(args[0])
	+
	+ for o, a in opts:
	+ if o == "-h":
	+ usage(args[0])
	+ else:
	+ assert False, "unhandled option"
	+
	+ if len(largs) < 1:
	+ usage(args[0])
	+
	+ link = largs[0]
	+
	+ options = chromeoptions()
	+ chromearguments = [
	+ "headless",
	+ "no-sandbox",
	+ "disable-extensions",
	+ "disable-dev-shm-usage",
	+ "start-maximized",
	+ "window-size=1900,1080",
	+ "disable-gpu"
	+ ]
	+ for carg in chromearguments:
	+ options.add_argument(carg)
	+
	+ driver = webdriver.Chrome(options=options)
	+ driver.get(link)
	+
	+ isnews = WebDriverWait(driver=driver, timeout=60).until(
	+ EC.presence_of_element_located((By.XPATH,
	+ "//div[@data-last-letter]")
	+ )
	+ )
	+ newslist = driver.find_elements(By.XPATH, "//div[@data-filter-target=\…
	+
	+ title = driver.find_elements(By.XPATH, "//meta[@property=\"og:title\"]…
	+ description = title
	+ globaltags = ""
	+
	+ print("""<?xml version="1.0" encoding="utf-8"?>""")
	+ print("""<feed xmlns="http://www.w3.org/2005/Atom">""")
	+ print("\t<title><![CDATA[%s]]></title>" % (title))
	+ print("\t<subtitle><![CDATA[%s]]></subtitle>" % (description))
	+ print("\t<id>%s</id>" % (link))
	+ print("\t<link href=\"%s\" rel=\"self\" />" % (link))
	+ print("\t<link href=\"%s\" />" % (link))
	+
	+ utcnow = datetime.now(pytz.utc)
	+ print("\t<updated>%s</updated>" % (utcnow.isoformat()))
	+
	+ articles = newslist.find_elements(By.XPATH, "./div")
	+ baselink = "/".join(link.split("/", 3)[:-1])
	+ for article in articles[::-1]:
	+ link = article.find_elements(By.XPATH, "./a")[0]
	+ plink = link.get_attribute("href")
	+ if not plink.startswith("http"):
	+ plink = "%s/%s" % (baselink, plink)
	+ ptitle = link.get_attribute("data-title")
	+ pcontent = article.text
	+ pauthor = "[email protected]"
	+
	+ # Normalize datetime.
	+ updateds = article.find_elements(By.XPATH, ".//time")[0].text
	+ try:
	+ dtupdated = datetime.strptime(updateds, "%d.%m.%Y")
	+ except ValueError:
	+ continue
	+
	+ dtupdated = dtupdated.replace(hour=12, minute=0,\
	+ second=0, tzinfo=pytz.utc)
	+ if dtupdated.year > utcnow.year:
	+ dtupdated = dtupdated.replace(year=utcnow.year)
	+ pupdated = dtupdated
	+
	+ print("\t<entry>")
	+ print("\t\t<id>%s</id>" % (plink))
	+ print("\t\t<title><![CDATA[%s]]></title>" % (ptitle))
	+ print("\t\t<link href=\"%s\" />" % (plink))
	+ print("\t\t<author><name>%s</name></author>" % (pauthor))
	+ print("\t\t<updated>%s</updated>" % (pupdated.isoformat()))
	+ print("\t\t<content><![CDATA[%s]]></content>" % (pcontent))
	+ print("\t</entry>")
	+
	+ print("</feed>")
	+
	+ return 0
	+
	+if __name__ == "__main__":
	+ sys.exit(main(sys.argv))
	+