Added work from my other class repositories before deletion

This commit is contained in:
2017-11-29 10:28:24 -08:00
parent cb0b5f4d25
commit 5ea24c81b5
198 changed files with 739603 additions and 0 deletions

View File

@@ -0,0 +1,84 @@
# eventkwic.txt by Corwin Perren
###########################################
########## Specification Writeup ##########
###########################################
My specification for the Kwic class is as follows. Once Kwic is constructed, it waits for a method call. If no text has
been added to the implementation and either index or listPairs is called, they will return empty arrays. Once text is
added using addText, that text is appended to a class variable as one long string. It is done this way to make sure that
lines that may be broken up into multiple addText calls still work properly. Now that there is text to process, a call
to index will process the whole string that's been stored and return the proper kwic index. If listPairs is called
instead, it will internally run index, but not return the indexed data, so that the new text is processed. It then
creates the pairs and returns the array. Index works semi-incrementally. Rather than keeping track of every line of text
ever added to the class, index processes the new data into the array of tuples (again, just for the new input), appends
it to the class's global array, and then resorts it. In this way, we avoid having to recompute the sentences that have
already been processed, minus having to re-alphabetize. I chose to not re-compute the sentences each time there was a
new call to addText as that seemed like a waste of cpu cycles. It's basically a trade off of having a call to index
be fast, or having the call to addText be fast. One potential option I considered was to use multi-threading to
continually process the input text for both indexing and listing pairs. While this would have greatly increased the
complexity (especially where eventspec is concerned), it also would have resulted in a faster implementation.
I used the eventspec class and the kwic.fsm state machine to verify that all of the above processes happen correctly,
including handling the extra constructor fields for periodsToBreaks and ignoreWords when they are used. By printing out
the steps taken through processing, it is very easy to see the program take the correct steps. In the case that a
catastrophic state logic error has occurred, the EventSpec class will stop execution with a trace statement, making
it very easy to diagnose what incorrect step was taken and correct it. I decided not to place event calls where there
was the potential for large loops to keep the log of steps taken clear and concise. Had I placed further logic to handle
these loops, there could potentially be hundreds, thousands, or more logged steps that would make debugging difficult.
In a sense, using EventSpec performs a similar function to unit testing as if you ever change your code in a way that
causes it to skip a step, or produce fatally incorrect output, the class will let you know roughly where the changed
code is, so therefore a good idea about what went wrong.
#####################################
########## Trace Output #1 ##########
#####################################
# Code run
kc = Kwic(periodsToBreaks=True)
kc.addText("This pair? is good.\n So is this pair and that pair")
kc.index()
kc.reset()
kc.addText("This pair? is good.\n So is this pair and that pair")
kc.listPairs()
kc.print_eventspec_log()
# Trace Output
STEP #0: callConstructor --> idle
STEP #1: callAddText --> idle
STEP #2: callIndex --> processIndex
STEP #3: callProcessIndex --> checkIfText
STEP #4: callSplitPeriods --> splitIntoTuples
STEP #5: callSplitAsTuples --> fillCircular
STEP #6: callFillCircular --> checkIgnoreOrAlpha
STEP #7: callAlphabetize --> idle
STEP #8: callReset --> idle
STEP #9: callAddText --> idle
STEP #10: callListPairs --> processListPairs
STEP #11: callProcessListPairs --> listPairsIndexOrList
STEP #12: callProcessIndex --> checkIfText
STEP #13: callSplitPeriods --> splitIntoTuples
STEP #14: callSplitAsTuples --> fillCircular
STEP #15: callFillCircular --> checkIgnoreOrAlpha
STEP #16: callAlphabetize --> idle
STEP #17: callCreateListPairs --> idle
#####################################
########## Trace Output #2 ##########
#####################################
# Code run
kc = Kwic(ignoreWords=["and", "So"])
kc.addText("This pair? is good.\n So is this pair and that pair")
kc.listPairs()
kc.print_eventspec_log()
# Trace Output
STEP #0: callConstructor --> idle
STEP #1: callAddText --> idle
STEP #2: callListPairs --> processListPairs
STEP #3: callProcessListPairs --> listPairsIndexOrList
STEP #4: callProcessIndex --> checkIfText
STEP #5: processNewlineSplit --> splitIntoTuples
STEP #6: callSplitAsTuples --> fillCircular
STEP #7: callFillCircular --> checkIgnoreOrAlpha
STEP #8: callRemoveWords --> removingWords
STEP #9: callAlphabetize --> idle
STEP #10: callCreateListPairs --> idle

View File

@@ -0,0 +1,66 @@
class EventSpec():
def readfsm(self,file):
# takes a filename, returns a finite state machine
# fsm is (begin state, structure)
fsm = {}
with open(file) as f:
s = None
t = {}
for l in f:
ls = l.split()
if (ls == []) or (ls[0][0] == "#"):
continue
if ls[0] == "state:":
if s != None:
if s in fsm:
raise SyntaxError("Cannot define state " + s + " twice")
fsm[s] = t
s = ls[1]
t = {}
elif ls[1] == "->":
t[ls[0]] = ls[2]
elif ls[0] == "begin:":
beginState = ls[1]
else:
raise SyntaxError(l + " is not a line in a finite state machine definition")
if s != None:
fsm[s] = t
return (beginState, fsm)
def __init__(self,file):
(self.start, self.machine) = self.readfsm(file)
self.state = self.start
self.trace = []
self.triggers = {}
def onEvent(self, event, action):
self.triggers[event] = action
def reset(self):
self.state = self.start
self.trace = []
def trace(self):
return self.trace
def state(self):
return self.state
def printLog(self):
i = 0
for (e,s) in self.trace:
print " STEP #"+str(i)+":",e,"-->",s
i += 1
def event(self, event):
try:
self.state = self.machine[self.state][event]
self.trace.append((event,self.state))
if event in self.triggers:
self.triggers[event]()
except KeyError:
raise RuntimeError("From state " + self.state + ", transition " + event + " is not allowed (trace: " + str(self.trace) + ")")

View File

@@ -0,0 +1,39 @@
begin: waitForStart
state: waitForStart
callConstructor -> idle
state: idle
callAddText -> idle
callIndex -> processIndex
callListPairs -> processListPairs
callReset -> idle
callCreateListPairs -> idle
state: processIndex
callProcessIndex -> checkIfText
state: checkIfText
callSplitPeriods -> splitIntoTuples
processNewlineSplit -> splitIntoTuples
processNoNewText -> idle
state: splitIntoTuples
callSplitAsTuples -> fillCircular
state: fillCircular
callFillCircular -> checkIgnoreOrAlpha
state: checkIgnoreOrAlpha
callAlphabetize -> idle
callRemoveWords -> removingWords
state: removingWords
callAlphabetize -> idle
state: processListPairs
callProcessListPairs -> listPairsIndexOrList
state: listPairsIndexOrList
callProcessIndex -> checkIfText
callCreateListPairs -> idle

View File

@@ -0,0 +1,270 @@
# Created by Corwin Perren
######################################################
########## Begin Kwic Class Implementation ###########
######################################################
class Kwic(object):
def __init__(self, ignoreWords=None, periodsToBreaks=False):
self.ignore_words = ignoreWords
self.periods_to_breaks = periodsToBreaks
self.all_sentence_tuples = []
self.kwic_output_array = []
self.list_pairs_output_array = []
self.text_to_add = ""
self.event_spec_instance = EventSpec("kwic.fsm")
self.event_spec_instance.event("callConstructor")
def addText(self, new_text):
self.event_spec_instance.event("callAddText")
self.text_to_add += " " + str(new_text)
def index(self):
self.event_spec_instance.event("callIndex")
self.__process_kwic_index()
return self.kwic_output_array
def listPairs(self):
self.event_spec_instance.event("callListPairs")
self.__process_list_pairs()
return self.list_pairs_output_array
def reset(self):
self.event_spec_instance.event("callReset")
self.all_sentence_tuples = []
self.kwic_output_array = []
self.list_pairs_output_array = []
self.text_to_add = []
def print_eventspec_log(self):
self.event_spec_instance.printLog()
def __process_kwic_index(self):
self.event_spec_instance.event("callProcessIndex")
if self.text_to_add:
if self.periods_to_breaks:
split_into_sentences = self.__split_by_periods(self.text_to_add)
else:
self.event_spec_instance.event("processNewlineSplit")
split_into_sentences = self.text_to_add.split('\n')
split_into_word_tuples = self.__split_by_word_as_tuples(split_into_sentences)
for sentence_tuple in split_into_word_tuples:
self.all_sentence_tuples.append(sentence_tuple)
circular_shifted_data = self.__fill_with_circular_shifts_and_original(split_into_word_tuples)
if self.ignore_words:
circular_shifted_data = self.__remove_words(circular_shifted_data, self.ignore_words)
for current_tuple in circular_shifted_data:
self.kwic_output_array.append(current_tuple)
self.text_to_add = ""
self.kwic_output_array = self.__alphabetize_tuple_list(self.kwic_output_array)
else:
self.event_spec_instance.event("processNoNewText")
def __process_list_pairs(self):
self.event_spec_instance.event("callProcessListPairs")
if self.text_to_add:
self.__process_kwic_index()
self.list_pairs_output_array = self.__create_list_pairs(self.all_sentence_tuples)
def __fill_with_circular_shifts_and_original(self, sentence_array):
self.event_spec_instance.event("callFillCircular")
output_array = []
for current_tuple in sentence_array:
for index, _ in enumerate(current_tuple[0]):
output_array.append((self.__array_circular_shift(current_tuple[0], index), current_tuple[1]))
return output_array
def __alphabetize_tuple_list(self, input_array):
self.event_spec_instance.event("callAlphabetize")
sorted_array = sorted(input_array, key=self.__alphabetized_key)
return sorted_array
def __create_list_pairs(self, input_array):
self.event_spec_instance.event("callCreateListPairs")
known_pairs = {}
for sentence_array, _ in input_array:
seen_in_sentence = set([])
for first_word in sentence_array:
for second_word in sentence_array:
first, second = self.__return_ordered_words(self.__sanitize_word(first_word),
self.__sanitize_word(second_word))
if (first == second) or (first == ""):
continue
if (first, second) not in seen_in_sentence:
seen_in_sentence.add((first, second))
if (first, second) in known_pairs:
known_pairs[(first, second)] += 1
else:
known_pairs[(first, second)] = 1
output_list = []
for key in known_pairs:
if known_pairs[key] > 1:
output_list.append((key, known_pairs[key]))
output_list.sort(key=self.__alphabetized_key)
return output_list
def __split_by_periods(self, document):
self.event_spec_instance.event("callSplitPeriods")
output_array = []
temp_sentence = ""
document_length_zero_indexed = len(document) - 1
for current_index, current_value in enumerate(document):
if current_value == '.':
if (current_index == 0) or (current_index == document_length_zero_indexed) or \
(document[current_index - 1].islower() and (document[current_index + 1].isspace() or
(document[current_index + 1] == '\n'))):
temp_sentence += current_value
output_array.append(temp_sentence)
temp_sentence = ""
else:
if current_value != '\n':
temp_sentence += current_value
else:
temp_sentence += " "
if temp_sentence:
output_array.append(temp_sentence)
return output_array
def __split_by_word_as_tuples(self, sentence_array):
self.event_spec_instance.event("callSplitAsTuples")
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def __array_circular_shift(self, input_array, rotate_val):
output_array = input_array[rotate_val:] + input_array[:rotate_val]
return output_array
def __alphabetized_key(self, input_data):
output_array = []
for word in input_data[0]:
output_array.append(word.lower())
return output_array
def __remove_words(self, input_array, words):
self.event_spec_instance.event("callRemoveWords")
lowered_input = []
output_array = []
for word in words:
lowered_input.append(word.lower())
for current_tuple in input_array:
if current_tuple[0][0].lower().strip(".:!?,") in lowered_input:
pass
else:
output_array.append(current_tuple)
return output_array
def __sanitize_word(self, input_word):
return input_word.lower().translate(None, ".,?!:")
def __return_ordered_words(self, word_one, word_two):
if word_one < word_two:
return word_one, word_two
else:
return word_two, word_one
######################################################
########## Begin Provided EventSpec Include ##########
######################################################
class EventSpec():
def readfsm(self, file):
# takes a filename, returns a finite state machine
# fsm is (begin state, structure)
fsm = {}
with open(file) as f:
s = None
t = {}
for l in f:
ls = l.split()
if (ls == []) or (ls[0][0] == "#"):
continue
if ls[0] == "state:":
if s != None:
if s in fsm:
raise SyntaxError("Cannot define state " + s + " twice")
fsm[s] = t
s = ls[1]
t = {}
elif ls[1] == "->":
t[ls[0]] = ls[2]
elif ls[0] == "begin:":
beginState = ls[1]
else:
raise SyntaxError(l + " is not a line in a finite state machine definition")
if s != None:
fsm[s] = t
return (beginState, fsm)
def __init__(self, file):
(self.start, self.machine) = self.readfsm(file)
self.state = self.start
self.trace = []
self.triggers = {}
def onEvent(self, event, action):
self.triggers[event] = action
def reset(self):
self.state = self.start
self.trace = []
def trace(self):
return self.trace
def state(self):
return self.state
def printLog(self):
i = 0
for (e, s) in self.trace:
print " STEP #" + str(i) + ":", e, "-->", s
i += 1
def event(self, event):
try:
self.state = self.machine[self.state][event]
self.trace.append((event, self.state))
if event in self.triggers:
self.triggers[event]()
except KeyError:
raise RuntimeError("From state " + self.state + ", transition " + event + " is not allowed (trace: " + str(
self.trace) + ")")

View File

@@ -0,0 +1,13 @@
begin: start
state: start
a -> infinibs
b -> infinias
state: infinibs
b -> infinibs
a -> start
state: infinias
a -> infiniais
b -> start

View File

@@ -0,0 +1,12 @@
# For testing implementation
from kwic import Kwic
output_check = [(['and', 'that', 'pair', 'So', 'is', 'this', 'pair'], 1), (['good.', 'This', 'pair?', 'is'], 0), (['is', 'good.', 'This', 'pair?'], 0), (['is', 'this', 'pair', 'and', 'that', 'pair', 'So'], 1), (['pair', 'and', 'that', 'pair', 'So', 'is', 'this'], 1), (['pair', 'So', 'is', 'this', 'pair', 'and', 'that'], 1), (['pair?', 'is', 'good.', 'This'], 0), (['So', 'is', 'this', 'pair', 'and', 'that', 'pair'], 1), (['that', 'pair', 'So', 'is', 'this', 'pair', 'and'], 1), (['this', 'pair', 'and', 'that', 'pair', 'So', 'is'], 1), (['This', 'pair?', 'is', 'good.'], 0)]
output_check_pairs = [(('is', 'pair'), 2), (('is', 'this'), 2), (('pair', 'this'), 2)]
if __name__ == "__main__":
kc = Kwic()
kc.addText("This pair? is good.\n So is this pair and that pair")
print kc.listPairs() == output_check_pairs
print kc.index() == output_check
kc.print_eventspec_log()

View File

@@ -0,0 +1,29 @@
from assign3 import eventspec
es = eventspec.EventSpec("kwic.fsm")
es.event("a")
es.event("b")
es.event("b")
es.event("b")
es.event("b")
es.event("a")
es.printLog()
# Beatles code
def printHello():
print "Hello"
def printGoodbye():
print "Goodbye"
es.onEvent("a",printHello)
es.onEvent("b",printGoodbye)
es.event("a")
es.event("a")
es.event("a")
es.event("b")
es.event("b")
es.event("a")
es.printLog()