Added work from my other class repositories before deletion

This commit is contained in:
2017-11-29 10:28:24 -08:00
parent cb0b5f4d25
commit 5ea24c81b5
198 changed files with 739603 additions and 0 deletions

View File

@@ -0,0 +1,44 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def split_by_word_as_tuples(sentence_array):
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
return split_into_word_tuples

View File

@@ -0,0 +1,33 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
hello_buddy_periods_output = [(["Hello", "there.", "Hello", "there,", "buddy.", "Hello", "and", "goodbye,", "buddy.",
"Hello", "is", "like", "buddy", "Goodbye!"], 0)]
hello_buddy_word_tuples_output = [(["Hello", "there."], 0),
(["Hello", "there,", "buddy."], 1),
(["Hello", "and", "goodbye,", "buddy."], 2),
(["Hello", "is", "like", "buddy", "Goodbye!"], 3)]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic(empty_document) == [])
# Ensure real input does not produce empty output
assert(kwic(design_words_doc) != [])
# Make sure it's broken into two lines
assert(len(kwic(design_words_doc)) == 2)
# Make sure it's broken into four line
assert(len(kwic(goodbye_buddy_doc)) == 4)
# Make sure array contains same elements from kwic vs test output
assert(any(x in kwic(hello_buddy_periods) for x in hello_buddy_periods_output))
# Make sure arrays contain the same elements from kwic vs test, even with periods for breaks...
assert(any(x in kwic(hello_buddy_periods, periodsToBreaks=True) for x in hello_buddy_word_tuples_output))