import keywords as kw
import word_iterator as wi
import header as hd
import color_print as cp
class Counter:
"""
Class that analyzes a source code and counts word counts
"""
def __init__(self, filename, color_print=False, verb_char=None):
self.word_iter = wi.WordIterator(filename) # source of words that will be analyzed
if verb_char is not None:
self.word_iter.add_separator(verb_char)
self.verb_char = verb_char # used for inline verbatim text
self.color_print = color_print # input text will be printed(with colors)
self.printer = cp.Printer()
self.regular_words_count = 0
self.header_words_count = 0
self.caption_words_count = 0
self.figure_float_count = 0
self.math_inline_count = 0
self.math_count = 0
self.all_headers = []
"""
Three possible types of __context:
1) regular-text (implicit)
2) headers
3) captions
"""
self.__context = "regular-text"
def run(self):
"""
Main method of the class
Loads all words and initializes the analysis
"""
pair = self.word_iter.read()
while pair is not None and pair[0] != "\\bye":
self.__process_word(pair)
pair = self.word_iter.read()
if pair is not None:
self.print_keyword(pair)
self.print_irrelevant_word(("", "\n"))
def print_result(self):
"""
Generates formatted output of the counter
"""
print("Text words summary: " + str(self.regular_words_count))
print("Header words summary: " + str(self.header_words_count))
print("Caption and notes words summary: " + str(self.caption_words_count))
print("Headers summary: " + str(len(self.all_headers)))
print("Figure/float count: " + str(self.figure_float_count))
print("Inline math formulae count: " + str(self.math_inline_count))
print("Math formulae count: " + str(self.math_count))
print("Subcounts: (header-words-count + text-words-count + caption-words-count)")
for header in self.all_headers:
print(header)
def print_counted_word(self, pair):
"""
Prints word that was counted. The color depends on category of the word.
"""
if not self.color_print:
return
if self.__context == "regular-text":
self.printer.blue(pair[0]+pair[1])
elif self.__context == "header":
self.printer.cyan(pair[0]+pair[1])
elif self.__context == "caption":
self.printer.green(pair[0]+pair[1])
def print_keyword(self, pair):
"""
Prints keyword - word with red color
"""
if not self.color_print:
return
self.printer.red(pair[0]+pair[1])
def print_irrelevant_word(self, pair):
"""
Prints words that wasn't counted and are not important for our functionality, like commentary words etc...
"""
if not self.color_print:
return
self.printer.white(pair[0]+pair[1])
def __process_word(self, pair):
"""
Decides how to treat with word from argument
"""
if len(pair[0]) == 0 or pair[0] == "\n": # empty word or newline
self.print_irrelevant_word(pair)
elif pair[0] == "%":
self.print_irrelevant_word(pair)
self.__skip_commentary()
elif pair[0] == "{":
self.print_irrelevant_word(pair)
self.__load_curly_brackets()
elif pair[0] == "$":
self.print_keyword(pair)
self.__load_inline_formulae()
elif pair[0] == "$$":
self.print_keyword(pair)
self.__load_formulae()
elif self.verb_char is not None and pair[0] == self.verb_char:
self.print_keyword(pair)
self.__load_verbatim(self.verb_char)
elif self.__is_keyword(pair[0]):
self.__process_keyword(pair)
else:
self.__process_text_word(pair)
def __process_keyword(self, pair):
"""
Treats with keywords.
Calls action on known keywords that are important for the counter
If keyword is unknown it is skipped
"""
word, arg = self.__split_keyword(pair[0])
self.print_keyword((word, ''))
if arg is None:
self.print_irrelevant_word(('', pair[1]))
else:
self.print_irrelevant_word((arg, pair[1]))
if word == '\\tit':
self.all_headers.append(hd.Header("title"))
self.__load_header()
elif word == '\\chap':
self.all_headers.append(hd.Header("chapter"))
self.__skip_brackets("[", "]")
self.__load_header()
elif word == '\\sec':
self.all_headers.append(hd.Header("section"))
self.__skip_brackets("[", "]")
self.__load_header()
elif word == '\\secc':
self.all_headers.append(hd.Header("subsection"))
self.__skip_brackets("[", "]")
self.__load_header()
elif word == '\\begitems':
self.__load_list()
elif word == '\\caption':
self.__skip_brackets("[", "]")
self.__load_caption()
elif word == '\\fnote' or word == '\\fnotetext' or word == '\\mnote':
self.__load_footnote()
elif word in kw.keywords_list:
if word in kw.floats_keywords:
self.figure_float_count += 1
self.__read_arguments(word)
elif word == '\\begtt':
self.__skip_commentary()
self.__load_verbatim()
elif word == '\\verbchar' or word == '\\activettchar': # keywords with same functionality
self.__set_verb_char(word, arg, pair[1])
elif word == '\\code':
self.__load_code_verbatim()
elif word in kw.logos:
self.__load_logo(word, arg, pair[1])
else:
pass # skip unknown keywords
def __process_text_word(self, pair):
"""
Increases word counts based on the word location - word context
"""
# word with only one character that is not alphanumeric won't be counted as word
if len(pair[0]) == 0:
self.print_irrelevant_word(pair)
elif len(pair[0]) == 1 and not pair[0].isalnum():
self.print_irrelevant_word(pair)
elif self.__context == "regular-text":
self.print_counted_word(pair)
self.regular_words_count += 1
if len(self.all_headers):
self.all_headers[-1].add_text_word()
elif self.__context == "header":
self.print_counted_word(pair)
self.header_words_count += 1
self.all_headers[-1].add_header_word(pair[0])
elif self.__context == "caption":
self.print_counted_word(pair)
self.caption_words_count += 1
if len(self.all_headers):
self.all_headers[-1].add_caption_word()
@staticmethod
def __is_keyword(word):
"""
Decides whether word is keyword or not
"""
if len(word) >= 3:
if word[0] == '\\' and word[1].isalpha() and word[2].isalpha():
return True
return False
@staticmethod
def __split_keyword(word):
"""
Splits word to two part - keyword and its argument
For example:
'\verbchar"' will be splitted into pair: '\verbchar', '"'
"""
for i in range(1, len(word)):
if not word[i].isalpha():
return word[:i], word[i:]
return word, None
def __load_header(self):
"""
Loads header
It reads the source code until new line occurs.
In case of ^^J at the end of the line it reads until next new line occurs.
"""
orig_context = self.__context
self.__context = 'header'
pair = self.word_iter.read()
skip_new_line = False
while pair is not None and pair[0] != "\\bye":
if not len(pair[0]):
self.print_irrelevant_word(pair)
elif pair[0] == "\n":
self.print_irrelevant_word(pair)
if not skip_new_line:
self.__context = orig_context
return
skip_new_line = False
elif pair[0] == "^^J":
self.print_irrelevant_word(pair)
skip_new_line = True
else:
self.__process_word(pair)
skip_new_line = False
pair = self.word_iter.read()
self.__context = orig_context
if pair is not None:
self.__process_keyword(pair)
def __load_list(self):
"""
Loads list - words that are surrounded by '\begitems' and '\enditems'
"""
pair = self.word_iter.read()
while pair is not None and pair[0] != "\\enditems":
if pair[0] != "*":
self.__process_word(pair)
else:
self.print_irrelevant_word(pair)
pair = self.word_iter.read()
if pair is None:
raise Exception("No list ending found - \\enditems")
else:
self.print_keyword(pair)
def __load_caption(self):
"""
Loads caption - words until EOL
"""
orig_context = self.__context
self.__context = 'caption'
pair = self.word_iter.read()
while pair is not None and pair[0] != "\\bye":
if pair[0] == "\n":
self.print_irrelevant_word(pair)
self.__context = orig_context
return
else:
self.__process_word(pair)
pair = self.word_iter.read()
self.__context = orig_context
if pair is not None:
self.print_keyword(pair)
def __load_footnote(self):
"""
Loads footnote(block in curly brackets)
"""
orig_context = self.__context
self.__context = 'caption'
pair = self.word_iter.read()
if pair is None:
raise Exception("No opening curly bracket found!")
while len(pair[0]) == 0 or pair[0].isspace():
self.print_irrelevant_word(pair)
pair = self.word_iter.read()
if pair[0] != "{":
self.__context = orig_context
self.word_iter.push_back(pair)
return
self.print_irrelevant_word(pair)
self.__load_curly_brackets()
self.__context = orig_context
def __load_formulae(self):
"""
Loads math formulae($$ as separator)
"""
pair = self.word_iter.read()
while pair is not None and pair[0] != "$$":
self.print_irrelevant_word(pair)
pair = self.word_iter.read()
if pair is None:
raise Exception("No end of math formulae found!")
else:
self.print_keyword(pair)
self.math_count += 1
def __load_inline_formulae(self):
"""
Loads inline math formulae($ as separator)
"""
pair = self.word_iter.read()
while pair is not None and pair[0] != "$":
self.print_irrelevant_word(pair)
pair = self.word_iter.read()
self.math_inline_count += 1
if pair is None:
raise Exception("No end of inline math formulae found!")
else:
self.print_keyword(pair)
def __load_verbatim(self, ending="\\endtt", keyword_print=True):
"""
Reads words until ending(param) word occurs.
These words are processed as regular word(not keywords etc...)
"""
pair = self.word_iter.read()
while pair is not None and ending != pair[0]:
self.__process_text_word(pair)
pair = self.word_iter.read()
if pair is None:
raise Exception("Verbatim text not terminated!")
else:
if keyword_print:
self.print_keyword(pair)
else:
self.print_irrelevant_word(pair)
def __load_code_verbatim(self):
"""
In case of verbatim using "\code" keyword
"""
pair = self.word_iter.read()
while len(pair[0]) == 0 or pair[0].isspace():
self.print_irrelevant_word(pair)
pair = self.word_iter.read()
if pair[0] != "{":
raise Exception("\\Code must be followed be opening curly bracket('{')!")
self.print_irrelevant_word(pair)
self.__load_verbatim("}", False)
def __load_logo(self, logo, arg, sep):
"""
Checks whether logo keyword is followed by slash etc...
"""
if arg != '/' or len(sep) > 0:
pass
elif self.__context == "regular-text":
self.regular_words_count += 1
if len(self.all_headers):
self.all_headers[-1].add_text_word()
elif self.__context == "header":
self.header_words_count += 1
self.all_headers[-1].add_header_word(logo)
elif self.__context == "caption":
self.caption_words_count += 1
if len(self.all_headers):
self.all_headers[-1].add_caption_word()
def __set_verb_char(self, word, arg, sep):
"""
Loads new character for inline verbatim
"""
if arg is not None and len(arg) == 1:
self.verb_char = arg
self.word_iter.add_separator(arg)
elif arg is None and sep == '':
pair = self.word_iter.read()
if len(pair[0]) != 1:
raise Exception("Invalid use of " + word)
else:
self.print_irrelevant_word(pair)
self.verb_char = pair[0]
self.word_iter.add_separator(pair[0])
else:
raise Exception("Invalid use of " + word)
def __read_arguments(self, word):
"""
Loads from keywords file what arguments has got particular keyword from argument
These arguments are skipped - not important for our purpose
"""
params = kw.keywords_list[word]
for p in params:
if p == "O":
pass # this case is managed by '__process_keyword' method
elif p == "W":
self.__obligatory_argument()
elif p == "S":
self.__skip_brackets("[", "]")
elif p == "P":
self.__skip_brackets("(", ")")
elif p == "C":
self.__skip_brackets("{", "}")
else: # unknown specifier - no argument expected
pass
def __obligatory_argument(self):
"""
Just reads another word. In case of the end of source file Exception is thrown
"""
pair = self.word_iter.read()
if pair is None or pair[0] == "\\bye":
raise Exception("No obligatory argument found")
self.print_irrelevant_word(pair)
return pair
def __load_curly_brackets(self):
"""
Loads block of source code in curly brackets
"""
pair = self.word_iter.read()
while pair is not None and pair[0] != "}":
self.__process_word(pair)
pair = self.word_iter.read()
if pair is None:
raise Exception("No closing bracket ('}') found.")
else:
self.print_irrelevant_word(pair)
def __skip_brackets(self, opening, closing):
"""
Skips block of source code in brackets opening-closing
"""
pair = self.word_iter.read()
bracket_count = 0
while True:
if pair is None:
raise Exception("No closing bracket ('" + closing + "') found.")
elif pair[0] == opening:
bracket_count += 1
self.print_irrelevant_word(pair)
elif pair[0] == closing:
bracket_count -= 1
self.print_irrelevant_word(pair)
if bracket_count <= 0:
if pair[0] != closing:
self.word_iter.push_back(pair)
break
elif pair[0] != opening and pair[0] != closing:
self.print_irrelevant_word(pair)
pair = self.word_iter.read()
def __skip_commentary(self):
"""
Skips entire line
"""
pair = self.word_iter.read()
while pair is not None and pair[0] != "\n":
self.print_irrelevant_word(pair)
pair = self.word_iter.read()
self.print_irrelevant_word(pair)