mirror of
https://github.com/caperren/school_archives.git
synced 2025-11-09 21:51:15 +00:00
Added work from my other class repositories before deletion
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def array_circular_shift(input_array, rotate_val):
|
||||
output_array = input_array[rotate_val:] + input_array[:rotate_val]
|
||||
return output_array
|
||||
|
||||
|
||||
def fill_with_circular_shifts_and_original(sentence_array):
|
||||
output_array = []
|
||||
|
||||
for current_tuple in sentence_array:
|
||||
for index, _ in enumerate(current_tuple[0]):
|
||||
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.split('\n')
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
if listPairs:
|
||||
output_data = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
|
||||
else:
|
||||
output_data = fill_with_circular_shifts_and_original(split_into_word_tuples)
|
||||
|
||||
return output_data
|
||||
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([]))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([]))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
this_is_split_periods_circular_output)
|
||||
Reference in New Issue
Block a user