mirror of
https://github.com/caperren/school_archives.git
synced 2025-11-09 21:51:15 +00:00
Added work from my other class repositories before deletion
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def array_circular_shift(input_array, rotate_val):
|
||||
output_array = input_array[rotate_val:] + input_array[:rotate_val]
|
||||
return output_array
|
||||
|
||||
|
||||
def fill_with_circular_shifts_and_original(sentence_array):
|
||||
output_array = []
|
||||
|
||||
for current_tuple in sentence_array:
|
||||
for index, _ in enumerate(current_tuple[0]):
|
||||
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
if listPairs:
|
||||
output_data = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
|
||||
else:
|
||||
output_data = fill_with_circular_shifts_and_original(split_into_word_tuples)
|
||||
|
||||
return output_data
|
||||
@@ -0,0 +1,31 @@
|
||||
1. How did you decide what to test first? Would your final code change significantly if you changed the order of tests?
|
||||
|
||||
The process for deciding what to test mostly had to do with a logical breakdown of the core parts of the code so that
|
||||
things that relied on others to work were done after those pre-requisites. So, for example, before we could do anything
|
||||
first the document had to be broken down into sentences, then words. In that part, you could also change whether or not
|
||||
the sentence breaks were handled with newlines or periods. Afterwards, you could then deal with rearranging the words as
|
||||
necessary, handling determining pairs, and then excluding any words that were given as arguments. I don't feel like the
|
||||
code would have been massively different if it'd been done in reverse, but it definitely would have been more difficult
|
||||
to write. On top of that, you'd still have to think all the way through how the code would need to function, regardless
|
||||
of the order in which you wrote it, in order to know what needed to be written in the first place. Also, if you
|
||||
literally reversed the order of the tests, it could lead to you skipping the whole point of using tests (if you did it
|
||||
incorrectly). Again, for an example, if you wrote a test that produced the final output of the program as the initial
|
||||
input and expected a correct output, you'd either have to hard code many many values in to the resulting code while you
|
||||
implemented them for real, or you'd end up writing all necessary code at once, which would defeat the purpose.
|
||||
|
||||
2. What did you think of test driven development, for this problem? What are the strengths and weaknesses of the
|
||||
approach? Does it encourage/discourage certain kinds of program designs?
|
||||
|
||||
I'm torn about it. I feel that for this particular problem, it may have been overkill and actually hampered design
|
||||
progress at times. The main issue was that for each major step, the output of the code often changed enough that many
|
||||
of the tests had to be modified or rewritten to match. The definite strengths of this approach are that you can easily
|
||||
tell when you change something that breaks the functionality of your code, and it will immediately produce an exception.
|
||||
On the flip side, it does take much longer to write functional code depend on what exactly you're writing. I would
|
||||
argue that the larger the application being written, the more important it would be to do this kind of testing.
|
||||
Without it, you might be digging through hundreds of thousands to millions of lines of code to figure out what is
|
||||
breaking. I can definitely appreciate the fact that taking the time to write the tests helps guarantee that it's doing
|
||||
what you want, but I feel like it would be more useful in other programming contexts, like the one just mentioned.
|
||||
I'd say this style of coding encourages program designs that have a consistent output even if the underlying code
|
||||
changes often. For example, I feel like it'd be ideal for developing an API where once a standard has been created,
|
||||
that API should function identically for a long time, even if the backend is changing constantly. In general, this way
|
||||
of programming also encourages full coverage of edge cases, as you'll easily and quickly be able to test for them.
|
||||
@@ -0,0 +1,2 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=[]):
|
||||
return []
|
||||
@@ -0,0 +1,5 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=[]):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
return [document]
|
||||
@@ -0,0 +1,7 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return [split_into_sentences]
|
||||
@@ -0,0 +1,10 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = document.split(".")
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return [split_into_sentences]
|
||||
@@ -0,0 +1,29 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return split_into_sentences
|
||||
@@ -0,0 +1,44 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
return split_into_word_tuples
|
||||
@@ -0,0 +1,61 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def array_circular_shift(input_array, rotate_val):
|
||||
output_array = input_array[rotate_val:] + input_array[:rotate_val]
|
||||
return output_array
|
||||
|
||||
|
||||
def fill_with_circular_shifts_and_original(sentence_array):
|
||||
output_array = []
|
||||
|
||||
for current_tuple in sentence_array:
|
||||
for index, _ in enumerate(current_tuple[0]):
|
||||
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return [], []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
output_tuple = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
|
||||
|
||||
return output_tuple
|
||||
Binary file not shown.
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([]))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([]))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
this_is_split_periods_circular_output)
|
||||
@@ -0,0 +1,6 @@
|
||||
from kwic import kwic
|
||||
|
||||
document = "" # Input no data
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(document) == []) # Ensure that results are empty because there's no data
|
||||
@@ -0,0 +1,8 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
|
||||
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
|
||||
@@ -0,0 +1,12 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
|
||||
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
|
||||
|
||||
assert(len(kwic(design_words_doc)[0]) == 2) # [(), ()] Make sure it's broken into two lines
|
||||
assert(len(kwic(goodbye_buddy_doc)[0]) == 4) # [(), ()] Make sure it's broken into four lines
|
||||
@@ -0,0 +1,25 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)[0]) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)[0]) == 4)
|
||||
|
||||
# Make sure line with just periods only shows up as one normally
|
||||
assert(len(kwic(hello_buddy_periods)[0]) == 1)
|
||||
|
||||
# Make sure it's broken into four lines once it's broken by periods instead
|
||||
assert(len(kwic(hello_buddy_periods, periodsToBreaks=True)[0]) == 4)
|
||||
@@ -0,0 +1,29 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
hello_buddy_periods_output = ["Hello there.", " Hello there, buddy.", " Hello and goodbye, buddy.",
|
||||
" Hello is like buddy Goodbye!"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)) == 4)
|
||||
|
||||
# Make sure line with just periods shows up as itself
|
||||
assert(kwic(hello_buddy_periods)[0] == hello_buddy_periods)
|
||||
|
||||
# Make sure it's broken into four lines once it's broken by periods instead
|
||||
# Also, this time it keeps the ending period like it's supposed to
|
||||
assert(kwic(hello_buddy_periods, periodsToBreaks=True) == hello_buddy_periods_output)
|
||||
@@ -0,0 +1,33 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
hello_buddy_periods_output = [(["Hello", "there.", "Hello", "there,", "buddy.", "Hello", "and", "goodbye,", "buddy.",
|
||||
"Hello", "is", "like", "buddy", "Goodbye!"], 0)]
|
||||
|
||||
hello_buddy_word_tuples_output = [(["Hello", "there."], 0),
|
||||
(["Hello", "there,", "buddy."], 1),
|
||||
(["Hello", "and", "goodbye,", "buddy."], 2),
|
||||
(["Hello", "is", "like", "buddy", "Goodbye!"], 3)]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)) == 4)
|
||||
|
||||
# Make sure array contains same elements from kwic vs test output
|
||||
assert(any(x in kwic(hello_buddy_periods) for x in hello_buddy_periods_output))
|
||||
|
||||
# Make sure arrays contain the same elements from kwic vs test, even with periods for breaks...
|
||||
assert(any(x in kwic(hello_buddy_periods, periodsToBreaks=True) for x in hello_buddy_word_tuples_output))
|
||||
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([], []))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([], []))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)[0]) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)[0]) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
(this_is_split_periods_circular_output, []))
|
||||
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([]))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([]))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
this_is_split_periods_circular_output)
|
||||
Reference in New Issue
Block a user