Added work from my other class repositories before deletion

2025-12-31 04:14:17 +00:00 · 2017-11-29 10:28:24 -08:00
parent cb0b5f4d25
commit 5ea24c81b5
198 changed files with 739603 additions and 0 deletions
--- a/Coursework/CS
+++ b/Coursework/CS
--- a/3/eventkwic.txt
+++ b/3/eventkwic.txt
@@ -0,0 +1,84 @@
+# eventkwic.txt by Corwin Perren
+
+###########################################
+########## Specification Writeup ##########
+###########################################
+My specification for the Kwic class is as follows. Once Kwic is constructed, it waits for a method call. If no text has
+been added to the implementation and either index or listPairs is called, they will return empty arrays. Once text is
+added using addText, that text is appended to a class variable as one long string. It is done this way to make sure that
+lines that may be broken up into multiple addText calls still work properly. Now that there is text to process, a call
+to index will process the whole string that's been stored and return the proper kwic index. If listPairs is called
+instead, it will internally run index, but not return the indexed data, so that the new text is processed. It then
+creates the pairs and returns the array. Index works semi-incrementally. Rather than keeping track of every line of text
+ever added to the class, index processes the new data into the array of tuples (again, just for the new input), appends
+it to the class's global array, and then resorts it. In this way, we avoid having to recompute the sentences that have
+already been processed, minus having to re-alphabetize. I chose to not re-compute the sentences each time there was a
+new call to addText as that seemed like a waste of cpu cycles. It's basically a trade off of having a call to index
+be fast, or having the call to addText be fast. One potential option I considered was to use multi-threading to
+continually process the input text for both indexing and listing pairs. While this would have greatly increased the
+complexity (especially where eventspec is concerned), it also would have resulted in a faster implementation.
+
+I used the eventspec class and the kwic.fsm state machine to verify that all of the above processes happen correctly,
+including handling the extra constructor fields for periodsToBreaks and ignoreWords when they are used. By printing out
+the steps taken through processing, it is very easy to see the program take the correct steps. In the case that a
+catastrophic state logic error has occurred, the EventSpec class will stop execution with a trace statement, making
+it very easy to diagnose what incorrect step was taken and correct it. I decided not to place event calls where there
+was the potential for large loops to keep the log of steps taken clear and concise. Had I placed further logic to handle
+these loops, there could potentially be hundreds, thousands, or more logged steps that would make debugging difficult.
+In a sense, using EventSpec performs a similar function to unit testing as if you ever change your code in a way that
+causes it to skip a step, or produce fatally incorrect output, the class will let you know roughly where the changed
+code is, so therefore a good idea about what went wrong.
+
+#####################################
+########## Trace Output #1 ##########
+#####################################
+# Code run
+kc = Kwic(periodsToBreaks=True)
+kc.addText("This pair? is good.\n So is this pair and that pair")
+kc.index()
+kc.reset()
+kc.addText("This pair? is good.\n So is this pair and that pair")
+kc.listPairs()
+kc.print_eventspec_log()
+
+# Trace Output
+STEP #0: callConstructor --> idle
+STEP #1: callAddText --> idle
+STEP #2: callIndex --> processIndex
+STEP #3: callProcessIndex --> checkIfText
+STEP #4: callSplitPeriods --> splitIntoTuples
+STEP #5: callSplitAsTuples --> fillCircular
+STEP #6: callFillCircular --> checkIgnoreOrAlpha
+STEP #7: callAlphabetize --> idle
+STEP #8: callReset --> idle
+STEP #9: callAddText --> idle
+STEP #10: callListPairs --> processListPairs
+STEP #11: callProcessListPairs --> listPairsIndexOrList
+STEP #12: callProcessIndex --> checkIfText
+STEP #13: callSplitPeriods --> splitIntoTuples
+STEP #14: callSplitAsTuples --> fillCircular
+STEP #15: callFillCircular --> checkIgnoreOrAlpha
+STEP #16: callAlphabetize --> idle
+STEP #17: callCreateListPairs --> idle
+
+#####################################
+########## Trace Output #2 ##########
+#####################################
+# Code run
+kc = Kwic(ignoreWords=["and", "So"])
+kc.addText("This pair? is good.\n So is this pair and that pair")
+kc.listPairs()
+kc.print_eventspec_log()
+
+# Trace Output
+STEP #0: callConstructor --> idle
+STEP #1: callAddText --> idle
+STEP #2: callListPairs --> processListPairs
+STEP #3: callProcessListPairs --> listPairsIndexOrList
+STEP #4: callProcessIndex --> checkIfText
+STEP #5: processNewlineSplit --> splitIntoTuples
+STEP #6: callSplitAsTuples --> fillCircular
+STEP #7: callFillCircular --> checkIgnoreOrAlpha
+STEP #8: callRemoveWords --> removingWords
+STEP #9: callAlphabetize --> idle
+STEP #10: callCreateListPairs --> idle
--- a/3/eventspec.py
+++ b/3/eventspec.py
@@ -0,0 +1,66 @@
+
+class EventSpec():
+
+    def readfsm(self,file):
+    # takes a filename, returns a finite state machine
+    # fsm is (begin state, structure)
+        fsm = {}
+        with open(file) as f:
+            s = None
+            t = {}
+            for l in f:
+                ls = l.split()
+                if (ls == []) or (ls[0][0] == "#"):
+                    continue
+                if ls[0] == "state:":
+                    if s != None:
+                        if s in fsm:
+                            raise SyntaxError("Cannot define state " + s + " twice")
+                        fsm[s] = t
+                    s = ls[1]
+                    t = {}
+                elif ls[1] == "->":
+                    t[ls[0]] = ls[2]
+                elif ls[0] == "begin:":
+                    beginState = ls[1]
+                else:
+                    raise SyntaxError(l + " is not a line in a finite state machine definition")
+            if s != None:
+                fsm[s] = t
+            return (beginState, fsm)
+
+    
+    def __init__(self,file):
+        (self.start, self.machine) = self.readfsm(file)
+        self.state = self.start
+        self.trace = []
+        self.triggers = {}
+
+    def onEvent(self, event, action):
+        self.triggers[event] = action
+        
+    def reset(self):
+        self.state = self.start
+        self.trace = []
+
+    def trace(self):
+        return self.trace
+
+    def state(self):
+        return self.state
+
+    def printLog(self):
+        i = 0
+        for (e,s) in self.trace:
+            print "  STEP #"+str(i)+":",e,"-->",s
+            i += 1
+        
+    def event(self, event):
+        try:
+            self.state = self.machine[self.state][event]
+            self.trace.append((event,self.state))
+            if event in self.triggers:
+                self.triggers[event]()
+        except KeyError:
+            raise RuntimeError("From state " + self.state + ", transition " + event + " is not allowed (trace: " + str(self.trace) + ")")
+
--- a/Coursework/CS
+++ b/Coursework/CS
@@ -0,0 +1,39 @@
+begin: waitForStart
+
+state: waitForStart
+callConstructor -> idle
+
+state: idle
+callAddText -> idle
+callIndex -> processIndex
+callListPairs -> processListPairs
+callReset -> idle
+callCreateListPairs -> idle
+
+state: processIndex
+callProcessIndex -> checkIfText
+
+state: checkIfText
+callSplitPeriods -> splitIntoTuples
+processNewlineSplit -> splitIntoTuples
+processNoNewText -> idle
+
+state: splitIntoTuples
+callSplitAsTuples -> fillCircular
+
+state: fillCircular
+callFillCircular -> checkIgnoreOrAlpha
+
+state: checkIgnoreOrAlpha
+callAlphabetize -> idle
+callRemoveWords -> removingWords
+
+state: removingWords
+callAlphabetize -> idle
+
+state: processListPairs
+callProcessListPairs -> listPairsIndexOrList
+
+state: listPairsIndexOrList
+callProcessIndex -> checkIfText
+callCreateListPairs -> idle
--- a/Coursework/CS
+++ b/Coursework/CS
@@ -0,0 +1,270 @@
+# Created by Corwin Perren
+
+######################################################
+########## Begin Kwic Class Implementation ###########
+######################################################
+class Kwic(object):
+    def __init__(self, ignoreWords=None, periodsToBreaks=False):
+        self.ignore_words = ignoreWords
+        self.periods_to_breaks = periodsToBreaks
+
+        self.all_sentence_tuples = []
+
+        self.kwic_output_array = []
+        self.list_pairs_output_array = []
+
+        self.text_to_add = ""
+
+        self.event_spec_instance = EventSpec("kwic.fsm")
+        self.event_spec_instance.event("callConstructor")
+
+    def addText(self, new_text):
+        self.event_spec_instance.event("callAddText")
+        self.text_to_add += " " + str(new_text)
+
+    def index(self):
+        self.event_spec_instance.event("callIndex")
+        self.__process_kwic_index()
+        return self.kwic_output_array
+
+    def listPairs(self):
+        self.event_spec_instance.event("callListPairs")
+        self.__process_list_pairs()
+        return self.list_pairs_output_array
+
+    def reset(self):
+        self.event_spec_instance.event("callReset")
+        self.all_sentence_tuples = []
+
+        self.kwic_output_array = []
+        self.list_pairs_output_array = []
+
+        self.text_to_add = []
+
+    def print_eventspec_log(self):
+        self.event_spec_instance.printLog()
+
+    def __process_kwic_index(self):
+        self.event_spec_instance.event("callProcessIndex")
+        if self.text_to_add:
+            if self.periods_to_breaks:
+                split_into_sentences = self.__split_by_periods(self.text_to_add)
+            else:
+                self.event_spec_instance.event("processNewlineSplit")
+                split_into_sentences = self.text_to_add.split('\n')
+
+            split_into_word_tuples = self.__split_by_word_as_tuples(split_into_sentences)
+
+            for sentence_tuple in split_into_word_tuples:
+                self.all_sentence_tuples.append(sentence_tuple)
+
+            circular_shifted_data = self.__fill_with_circular_shifts_and_original(split_into_word_tuples)
+
+            if self.ignore_words:
+                circular_shifted_data = self.__remove_words(circular_shifted_data, self.ignore_words)
+
+            for current_tuple in circular_shifted_data:
+                self.kwic_output_array.append(current_tuple)
+
+            self.text_to_add = ""
+
+            self.kwic_output_array = self.__alphabetize_tuple_list(self.kwic_output_array)
+        else:
+            self.event_spec_instance.event("processNoNewText")
+
+    def __process_list_pairs(self):
+        self.event_spec_instance.event("callProcessListPairs")
+        if self.text_to_add:
+            self.__process_kwic_index()
+
+        self.list_pairs_output_array = self.__create_list_pairs(self.all_sentence_tuples)
+
+    def __fill_with_circular_shifts_and_original(self, sentence_array):
+        self.event_spec_instance.event("callFillCircular")
+        output_array = []
+
+        for current_tuple in sentence_array:
+            for index, _ in enumerate(current_tuple[0]):
+                output_array.append((self.__array_circular_shift(current_tuple[0], index), current_tuple[1]))
+
+        return output_array
+
+    def __alphabetize_tuple_list(self, input_array):
+        self.event_spec_instance.event("callAlphabetize")
+        sorted_array = sorted(input_array, key=self.__alphabetized_key)
+        return sorted_array
+
+    def __create_list_pairs(self, input_array):
+        self.event_spec_instance.event("callCreateListPairs")
+        known_pairs = {}
+
+        for sentence_array, _ in input_array:
+            seen_in_sentence = set([])
+
+            for first_word in sentence_array:
+                for second_word in sentence_array:
+                    first, second = self.__return_ordered_words(self.__sanitize_word(first_word),
+                                                                self.__sanitize_word(second_word))
+
+                    if (first == second) or (first == ""):
+                        continue
+
+                    if (first, second) not in seen_in_sentence:
+                        seen_in_sentence.add((first, second))
+
+                        if (first, second) in known_pairs:
+                            known_pairs[(first, second)] += 1
+                        else:
+                            known_pairs[(first, second)] = 1
+
+        output_list = []
+
+        for key in known_pairs:
+            if known_pairs[key] > 1:
+                output_list.append((key, known_pairs[key]))
+
+        output_list.sort(key=self.__alphabetized_key)
+
+        return output_list
+
+    def __split_by_periods(self, document):
+        self.event_spec_instance.event("callSplitPeriods")
+        output_array = []
+        temp_sentence = ""
+        document_length_zero_indexed = len(document) - 1
+        for current_index, current_value in enumerate(document):
+            if current_value == '.':
+                if (current_index == 0) or (current_index == document_length_zero_indexed) or \
+                        (document[current_index - 1].islower() and (document[current_index + 1].isspace() or
+                                                                        (document[current_index + 1] == '\n'))):
+                    temp_sentence += current_value
+                    output_array.append(temp_sentence)
+                    temp_sentence = ""
+            else:
+                if current_value != '\n':
+                    temp_sentence += current_value
+                else:
+                    temp_sentence += " "
+
+        if temp_sentence:
+            output_array.append(temp_sentence)
+        return output_array
+
+    def __split_by_word_as_tuples(self, sentence_array):
+        self.event_spec_instance.event("callSplitAsTuples")
+
+        output_array = []
+        index_incrementer = 0
+
+        for sentence in sentence_array:
+            words_array = sentence.split(" ")
+            words_array = filter(None, words_array)
+            output_array.append((words_array, index_incrementer))
+            index_incrementer += 1
+
+        return output_array
+
+    def __array_circular_shift(self, input_array, rotate_val):
+        output_array = input_array[rotate_val:] + input_array[:rotate_val]
+        return output_array
+
+    def __alphabetized_key(self, input_data):
+        output_array = []
+        for word in input_data[0]:
+            output_array.append(word.lower())
+        return output_array
+
+    def __remove_words(self, input_array, words):
+        self.event_spec_instance.event("callRemoveWords")
+
+        lowered_input = []
+        output_array = []
+
+        for word in words:
+            lowered_input.append(word.lower())
+
+        for current_tuple in input_array:
+            if current_tuple[0][0].lower().strip(".:!?,") in lowered_input:
+                pass
+            else:
+                output_array.append(current_tuple)
+
+        return output_array
+
+    def __sanitize_word(self, input_word):
+        return input_word.lower().translate(None, ".,?!:")
+
+    def __return_ordered_words(self, word_one, word_two):
+
+        if word_one < word_two:
+            return word_one, word_two
+        else:
+            return word_two, word_one
+
+
+######################################################
+########## Begin Provided EventSpec Include ##########
+######################################################
+class EventSpec():
+    def readfsm(self, file):
+        # takes a filename, returns a finite state machine
+        # fsm is (begin state, structure)
+        fsm = {}
+        with open(file) as f:
+            s = None
+            t = {}
+            for l in f:
+                ls = l.split()
+                if (ls == []) or (ls[0][0] == "#"):
+                    continue
+                if ls[0] == "state:":
+                    if s != None:
+                        if s in fsm:
+                            raise SyntaxError("Cannot define state " + s + " twice")
+                        fsm[s] = t
+                    s = ls[1]
+                    t = {}
+                elif ls[1] == "->":
+                    t[ls[0]] = ls[2]
+                elif ls[0] == "begin:":
+                    beginState = ls[1]
+                else:
+                    raise SyntaxError(l + " is not a line in a finite state machine definition")
+            if s != None:
+                fsm[s] = t
+            return (beginState, fsm)
+
+    def __init__(self, file):
+        (self.start, self.machine) = self.readfsm(file)
+        self.state = self.start
+        self.trace = []
+        self.triggers = {}
+
+    def onEvent(self, event, action):
+        self.triggers[event] = action
+
+    def reset(self):
+        self.state = self.start
+        self.trace = []
+
+    def trace(self):
+        return self.trace
+
+    def state(self):
+        return self.state
+
+    def printLog(self):
+        i = 0
+        for (e, s) in self.trace:
+            print "  STEP #" + str(i) + ":", e, "-->", s
+            i += 1
+
+    def event(self, event):
+        try:
+            self.state = self.machine[self.state][event]
+            self.trace.append((event, self.state))
+            if event in self.triggers:
+                self.triggers[event]()
+        except KeyError:
+            raise RuntimeError("From state " + self.state + ", transition " + event + " is not allowed (trace: " + str(
+                self.trace) + ")")
--- a/3/machine.fsm
+++ b/3/machine.fsm
@@ -0,0 +1,13 @@
+begin: start
+
+state: start
+a -> infinibs
+b -> infinias
+
+state: infinibs
+b -> infinibs
+a -> start
+
+state: infinias
+a -> infiniais
+b -> start
--- a/Coursework/CS
+++ b/Coursework/CS
@@ -0,0 +1,12 @@
+# For testing implementation
+from kwic import Kwic
+
+output_check = [(['and', 'that', 'pair', 'So', 'is', 'this', 'pair'], 1),   (['good.', 'This', 'pair?', 'is'], 0),   (['is', 'good.', 'This', 'pair?'], 0),   (['is', 'this', 'pair', 'and', 'that', 'pair', 'So'], 1),   (['pair', 'and', 'that', 'pair', 'So', 'is', 'this'], 1),   (['pair', 'So', 'is', 'this', 'pair', 'and', 'that'], 1),   (['pair?', 'is', 'good.', 'This'], 0),   (['So', 'is', 'this', 'pair', 'and', 'that', 'pair'], 1),   (['that', 'pair', 'So', 'is', 'this', 'pair', 'and'], 1),   (['this', 'pair', 'and', 'that', 'pair', 'So', 'is'], 1),   (['This', 'pair?', 'is', 'good.'], 0)]
+output_check_pairs = [(('is', 'pair'), 2), (('is', 'this'), 2), (('pair', 'this'), 2)]
+
+if __name__ == "__main__":
+    kc = Kwic()
+    kc.addText("This pair? is good.\n So is this pair and that pair")
+    print kc.listPairs() == output_check_pairs
+    print kc.index() == output_check
+    kc.print_eventspec_log()
--- a/3/testeventspec.py
+++ b/3/testeventspec.py
@@ -0,0 +1,29 @@
+from assign3 import eventspec
+
+es = eventspec.EventSpec("kwic.fsm")
+
+es.event("a")
+es.event("b")
+es.event("b")
+es.event("b")
+es.event("b")
+es.event("a")
+es.printLog()
+
+# Beatles code
+
+def printHello():
+    print "Hello"
+def printGoodbye():
+    print "Goodbye"
+    
+es.onEvent("a",printHello)
+es.onEvent("b",printGoodbye)
+
+es.event("a")
+es.event("a")
+es.event("a")
+es.event("b")
+es.event("b")
+es.event("a")
+es.printLog()