* everything configurable via `Config`
* has a throttler so we don't accidentally submit too much at a time
* has a dry-run for submission
* it remembers what has already been submitted so they don't get submitted
again
* TODO:
- read from argv or a config file
- set up a cron job
- put it on tildegit (once my application issue get sorted out)
"""
from dataclasses import dataclass
import datetime as dt
import io
import logging
import os
import pickle
import pwd
import random
import subprocess as sp
import time
import typing as t
# Throttles log entries to submit. Just in case there's a bug.
# Usually we'd limit logs to submit to a small number, and maybe also
# send out some alert.
SubmissionThrottle = t.Callable[[t.List[LogEntry]], t.List[LogEntry]]
@dataclass
class ListingFileLogIterator(LogIterator):
listing_dir: str
listing_filename: str
utils: Utils
def __call__(self) -> t.List[LogEntry]:
with open(
f"{self.listing_dir}/{self.listing_filename}",
"r",
encoding="utf-8"
) as f:
entries = f.readlines()
return [self._parse(ent) for ent in entries]
def _parse(self, entry: str) -> LogEntry:
"""Parse a listing file entry into a `LogEntry`
An entry looks like this:
0betsy - About QEC /betsy/qec.txt
I.e.
0<ship> - <title><TAB><file_path>
Note:
* <file_path> is rooted at /var/gohper, i.e., where the listing
file resides.
"""
import re
res = re.match(r"^0(.+?) - (.+)\t(.+)$", entry)
if not res: raise ValueError(f"Cannot parse: {entry}")
# It's more robust to use the file path (/ship/fn.txt) to obtain ship's
# name, rather than res.group(1). This is b/c there're duplicated
# entries in the listing:
# 0Polonia - 24131 /Polonia-II/24131.txt
# 0Polonia-II - 24131 /Polonia-II/24131.txt
title = res.group(2)
log_path = res.group(3)
ship, log_fn = self._parse_log_file_name(log_path)
ship_owner = self.utils.ship_owner(ship)
logs_to_submit = [log for log in iterate_logs() if should_submit(log)]
### # FOR TEST: remove - randomly choose one log
### logs_to_submit = logs_to_submit[random.randint(0, len(logs_to_submit)-2):][0:]
logs_to_submit = throttler(logs_to_submit)
_LOGGER.info(f"Submitting {len(logs_to_submit)} logs...")
for log in logs_to_submit: submit_log(log)