#!/usr/bin/env python3

"""
q2n - QEC to NNTP sync

This script syncs QEC logs to NNTP.

* everything configurable via `Config`
* has a throttler so we don't accidentally submit too much at a time
* has a dry-run for submission
* it remembers what has already been submitted so they don't get submitted
 again
* TODO:
   - read from argv or a config file
   - set up a cron job
   - put it on tildegit (once my application issue get sorted out)
"""

from dataclasses import dataclass
import datetime as dt
import io
import logging
import os
import pickle
import pwd
import random
import subprocess as sp
import time
import typing as t


_LOGGER = logging.getLogger(__name__)

Path = str
User = str
NntpArticleBody = str
LogEntryHash = str


@dataclass
class Config:
   listing_dir: str
   listing_filename: str
   nntp_group: str
   nntp_server: str
   max_submission: int
   submission_store_dir: Path

   @classmethod
   def create(cls):
       return Config(
           listing_dir="/var/gopher/",
           listing_filename="listing.gophermap",
           nntp_server="localhost",
           # TODO: find more appropriate one
           nntp_group="cosmic.worldbuilding",
           max_submission=5,
           submission_store_dir="/var/tmp/q2n",
       )


@dataclass
class Ship:
   name: str
   owner: User


@dataclass
class LogEntry:
   ship: Ship
   author: User
   title: str
   file_name: str


class LogIterator(t.Protocol):
   def __call__(self) -> t.List[LogEntry]: ...


class SubmitCondition(t.Protocol):
   def __call__(self, log_entry: LogEntry) -> bool: ...


class LogSubmitter(t.Protocol):
   def __call__(self, log: LogEntry) -> None: ...


@dataclass
class Utils:
   config: Config

   def ship_owner(self, ship_name: str) -> User:
       return self._get_path_user(
           f"{self.config.listing_dir}/{ship_name}"
       )

   def read_log_content(self, log: LogEntry) -> str:
       return self._read_log_entry(
           f"{self.config.listing_dir}/{log.ship.name}/{log.file_name}"
       )

   @staticmethod
   def _read_log_entry(path: str) -> str:
       with open(path, "r", encoding="utf-8") as f:
           return f.read()

   @staticmethod
   def _get_path_user(fp: str) -> User:
       st = os.stat(fp)
       return pwd.getpwuid(st.st_uid).pw_name

@dataclass
class SubmittedLogsStore:
   store_dir: str

   def __post_init__(self):
       import subprocess as sp
       sp.check_call(
           f"mkdir -p {self.store_dir}",
           shell=True
       )

   def record_submission(self, log: LogEntry):
       with open(f"{self.store_dir}/{self.checksum(log)}", "wb") as f:
           pickle.dump(log, f)

   def load_submitted_logs(self) -> t.List[LogEntryHash]:
       return os.listdir(self.store_dir)

   @staticmethod
   def checksum(log: LogEntry) -> LogEntryHash:
       import hashlib
       checked_str = f"{log.ship.name}{log.file_name}"
       return hashlib.md5(checked_str.encode("utf-8")).hexdigest()


# Throttles log entries to submit. Just in case there's a bug.
# Usually we'd limit logs to submit to a small number, and maybe also
# send out some alert.
SubmissionThrottle = t.Callable[[t.List[LogEntry]], t.List[LogEntry]]


@dataclass
class ListingFileLogIterator(LogIterator):
   listing_dir: str
   listing_filename: str
   utils: Utils

   def __call__(self) -> t.List[LogEntry]:
       with open(
           f"{self.listing_dir}/{self.listing_filename}",
           "r",
           encoding="utf-8"
       ) as f:
           entries = f.readlines()
       return [self._parse(ent) for ent in entries]

   def _parse(self, entry: str) -> LogEntry:
       """Parse a listing file entry into a `LogEntry`

       An entry looks like this:
           0betsy - About QEC  /betsy/qec.txt

       I.e.
           0<ship> - <title><TAB><file_path>

       Note:
       * <file_path> is rooted at /var/gohper, i.e., where the listing
         file resides.
       """
       import re
       res = re.match(r"^0(.+?) - (.+)\t(.+)$", entry)
       if not res: raise ValueError(f"Cannot parse: {entry}")

       # It's more robust to use the file path (/ship/fn.txt) to obtain ship's
       # name, rather than res.group(1). This is b/c there're duplicated
       # entries in the listing:
       #   0Polonia - 24131    /Polonia-II/24131.txt
       #   0Polonia-II - 24131 /Polonia-II/24131.txt
       title = res.group(2)
       log_path = res.group(3)
       ship, log_fn = self._parse_log_file_name(log_path)
       ship_owner = self.utils.ship_owner(ship)

       return LogEntry(
           ship=Ship(name=ship, owner=ship_owner),
           author=ship_owner,
           title=title,
           file_name=log_fn,
       )

   @staticmethod
   def _parse_log_file_name(ship_and_file: str) -> t.Tuple[str, str]:
       "/<ship>/file.txt -> (<ship>, file.txt)"
       return t.cast(
           t.Tuple[str, str],
           tuple(x for x in ship_and_file.split("/") if x),
       )


@dataclass
class SubmitConditionImpl(SubmitCondition):
   submission_store: SubmittedLogsStore

   def __call__(self, log_entry: LogEntry) -> bool:
       return (
           self.submission_store.checksum(log_entry)
           not in self.submission_store.load_submitted_logs()
       )


@dataclass
class NntpLogSubmitter(LogSubmitter):

   @dataclass
   class NntpLogFormat:
       subject: str
       body: str
       from_: str

   submission_store: SubmittedLogsStore
   read_log_entry: t.Callable[[LogEntry], NntpArticleBody]
   nntp_group: str
   nntp_server: str
   dry_run: bool = False

   def __call__(self, log: LogEntry) -> None:
       self.nntp_submit(log)
       self.submission_store.record_submission(log)

   def add_envelope(self, article: str, log: LogEntry) -> str:
       return f"""\
TIMESTAMP: {int(time.time())} SGT
AUTHOR: {log.author}
ORIGINATING SHIP: {log.ship.name}
QEC GATEWAY: QG-{random.randint(0, 31)}

{article}
"""

   def nntp_submit(self, log: LogEntry) -> None:
       import nntplib as nn
       s = nn.NNTP(self.nntp_server, readermode=True)

       article_body = self.read_log_entry(log)
       article_body = self.add_envelope(article_body, log)

       msg = f"""\
Newsgroups: {self.nntp_group}
Subject: [QEC] {log.title}
From: {log.author} "{log.author}@cosmic.voyage"

{article_body}
"""
       f = io.BytesIO(msg.encode("utf-8"))
       f.seek(0)
       _LOGGER.info(f"About to submit log:\n{msg}")
       if not self.dry_run:
           s.post(f)


@dataclass
class SubmissionThrottler:
   max_submission: int
   def __call__(self, logs: t.List[LogEntry]) -> t.List[LogEntry]:
       return logs[0:self.max_submission]


def main():

   logging.basicConfig()
   logging.root.setLevel(logging.INFO)

   config = Config.create()
   _LOGGER.info(f"Running with config: {config}")

   utils = Utils(config=config)

   iterate_logs = ListingFileLogIterator(
       listing_dir=config.listing_dir,
       listing_filename=config.listing_filename,
       utils=utils,
   )
   throttler = SubmissionThrottler(config.max_submission)
   submission_store = SubmittedLogsStore(store_dir=config.submission_store_dir)
   should_submit = SubmitConditionImpl(submission_store=submission_store)
   submit_log = NntpLogSubmitter(
       submission_store=submission_store,
       read_log_entry=utils.read_log_content,
       nntp_group=config.nntp_group,
       nntp_server=config.nntp_server,
       dry_run=True,  # TODO remove
   )

   logs_to_submit = [log for log in iterate_logs() if should_submit(log)]
   ### # FOR TEST: remove - randomly choose one log
   ### logs_to_submit = logs_to_submit[random.randint(0, len(logs_to_submit)-2):][0:]
   logs_to_submit = throttler(logs_to_submit)
   _LOGGER.info(f"Submitting {len(logs_to_submit)} logs...")
   for log in logs_to_submit: submit_log(log)


if __name__ == "__main__":
   main()