Added work from my other class repositories before deletion

This commit is contained in:
2017-11-29 10:28:24 -08:00
parent cb0b5f4d25
commit 5ea24c81b5
198 changed files with 739603 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
def kwic(document, listPairs=False, ignoreWords=[]):
return []

View File

@@ -0,0 +1,6 @@
from kwic import kwic
document = "" # Input no data
if __name__ == "__main__":
assert(kwic(document) == []) # Ensure that results are empty because there's no data

View File

@@ -0,0 +1,5 @@
def kwic(document, listPairs=False, ignoreWords=[]):
if not document:
return []
return [document]

View File

@@ -0,0 +1,8 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
if __name__ == "__main__":
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output

View File

@@ -0,0 +1,7 @@
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
split_into_sentences = document.splitlines()
return [split_into_sentences]

View File

@@ -0,0 +1,12 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
if __name__ == "__main__":
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
assert(len(kwic(design_words_doc)[0]) == 2) # [(), ()] Make sure it's broken into two lines
assert(len(kwic(goodbye_buddy_doc)[0]) == 4) # [(), ()] Make sure it's broken into four lines

View File

@@ -0,0 +1,10 @@
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = document.split(".")
else:
split_into_sentences = document.splitlines()
return [split_into_sentences]

View File

@@ -0,0 +1,25 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic(empty_document) == [])
# Ensure real input does not produce empty output
assert(kwic(design_words_doc) != [])
# Make sure it's broken into two lines
assert(len(kwic(design_words_doc)[0]) == 2)
# Make sure it's broken into four line
assert(len(kwic(goodbye_buddy_doc)[0]) == 4)
# Make sure line with just periods only shows up as one normally
assert(len(kwic(hello_buddy_periods)[0]) == 1)
# Make sure it's broken into four lines once it's broken by periods instead
assert(len(kwic(hello_buddy_periods, periodsToBreaks=True)[0]) == 4)

View File

@@ -0,0 +1,29 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
return split_into_sentences

View File

@@ -0,0 +1,29 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
hello_buddy_periods_output = ["Hello there.", " Hello there, buddy.", " Hello and goodbye, buddy.",
" Hello is like buddy Goodbye!"]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic(empty_document) == [])
# Ensure real input does not produce empty output
assert(kwic(design_words_doc) != [])
# Make sure it's broken into two lines
assert(len(kwic(design_words_doc)) == 2)
# Make sure it's broken into four line
assert(len(kwic(goodbye_buddy_doc)) == 4)
# Make sure line with just periods shows up as itself
assert(kwic(hello_buddy_periods)[0] == hello_buddy_periods)
# Make sure it's broken into four lines once it's broken by periods instead
# Also, this time it keeps the ending period like it's supposed to
assert(kwic(hello_buddy_periods, periodsToBreaks=True) == hello_buddy_periods_output)

View File

@@ -0,0 +1,44 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def split_by_word_as_tuples(sentence_array):
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
return split_into_word_tuples

View File

@@ -0,0 +1,33 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
hello_buddy_periods_output = [(["Hello", "there.", "Hello", "there,", "buddy.", "Hello", "and", "goodbye,", "buddy.",
"Hello", "is", "like", "buddy", "Goodbye!"], 0)]
hello_buddy_word_tuples_output = [(["Hello", "there."], 0),
(["Hello", "there,", "buddy."], 1),
(["Hello", "and", "goodbye,", "buddy."], 2),
(["Hello", "is", "like", "buddy", "Goodbye!"], 3)]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic(empty_document) == [])
# Ensure real input does not produce empty output
assert(kwic(design_words_doc) != [])
# Make sure it's broken into two lines
assert(len(kwic(design_words_doc)) == 2)
# Make sure it's broken into four line
assert(len(kwic(goodbye_buddy_doc)) == 4)
# Make sure array contains same elements from kwic vs test output
assert(any(x in kwic(hello_buddy_periods) for x in hello_buddy_periods_output))
# Make sure arrays contain the same elements from kwic vs test, even with periods for breaks...
assert(any(x in kwic(hello_buddy_periods, periodsToBreaks=True) for x in hello_buddy_word_tuples_output))

View File

@@ -0,0 +1,61 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def split_by_word_as_tuples(sentence_array):
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def array_circular_shift(input_array, rotate_val):
output_array = input_array[rotate_val:] + input_array[:rotate_val]
return output_array
def fill_with_circular_shifts_and_original(sentence_array):
output_array = []
for current_tuple in sentence_array:
for index, _ in enumerate(current_tuple[0]):
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return [], []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
output_tuple = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
return output_tuple

View File

@@ -0,0 +1,55 @@
import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
this_is_split_periods_circular_output = [
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
(['Not', 'newlines.'], 1),
(['newlines.', 'Not'], 1)
]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic.kwic(empty_document) == ([], []))
# Ensure real input does not produce empty output
assert(kwic.kwic(design_words_doc) != ([], []))
# Make sure it's broken into two lines
assert(len(kwic.kwic(design_words_doc)[0]) == 6)
# Make sure it's broken into four line
assert(len(kwic.kwic(goodbye_buddy_doc)[0]) == 14)
# Just realized I can check each individual function, so here is the check to split by periods
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
["This is something split by periods.", " Not newlines."])
# This checks to make sure a sentence gets split into words properly
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
# These check to make sure that the circular shift function is rotating properly
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
["One", "Two", "Three", "Four", "Five"])
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
["Three", "Four", "Five", "One", "Two"])
# This checks to make sure than circularly shifted versions of all sentences are correct
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['Not', 'newlines.'], 1)]) ==
this_is_split_periods_circular_output)
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
# correctly...
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
(this_is_split_periods_circular_output, []))

View File

@@ -0,0 +1,64 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def split_by_word_as_tuples(sentence_array):
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def array_circular_shift(input_array, rotate_val):
output_array = input_array[rotate_val:] + input_array[:rotate_val]
return output_array
def fill_with_circular_shifts_and_original(sentence_array):
output_array = []
for current_tuple in sentence_array:
for index, _ in enumerate(current_tuple[0]):
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.split('\n')
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
if listPairs:
output_data = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
else:
output_data = fill_with_circular_shifts_and_original(split_into_word_tuples)
return output_data

View File

@@ -0,0 +1,55 @@
import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
this_is_split_periods_circular_output = [
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
(['Not', 'newlines.'], 1),
(['newlines.', 'Not'], 1)
]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic.kwic(empty_document) == ([]))
# Ensure real input does not produce empty output
assert(kwic.kwic(design_words_doc) != ([]))
# Make sure it's broken into two lines
assert(len(kwic.kwic(design_words_doc)) == 6)
# Make sure it's broken into four line
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
# Just realized I can check each individual function, so here is the check to split by periods
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
["This is something split by periods.", " Not newlines."])
# This checks to make sure a sentence gets split into words properly
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
# These check to make sure that the circular shift function is rotating properly
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
["One", "Two", "Three", "Four", "Five"])
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
["Three", "Four", "Five", "One", "Two"])
# This checks to make sure than circularly shifted versions of all sentences are correct
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['Not', 'newlines.'], 1)]) ==
this_is_split_periods_circular_output)
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
# correctly...
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
this_is_split_periods_circular_output)

View File

@@ -0,0 +1,64 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def split_by_word_as_tuples(sentence_array):
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def array_circular_shift(input_array, rotate_val):
output_array = input_array[rotate_val:] + input_array[:rotate_val]
return output_array
def fill_with_circular_shifts_and_original(sentence_array):
output_array = []
for current_tuple in sentence_array:
for index, _ in enumerate(current_tuple[0]):
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
if listPairs:
output_data = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
else:
output_data = fill_with_circular_shifts_and_original(split_into_word_tuples)
return output_data

View File

@@ -0,0 +1,31 @@
1. How did you decide what to test first? Would your final code change significantly if you changed the order of tests?
The process for deciding what to test mostly had to do with a logical breakdown of the core parts of the code so that
things that relied on others to work were done after those pre-requisites. So, for example, before we could do anything
first the document had to be broken down into sentences, then words. In that part, you could also change whether or not
the sentence breaks were handled with newlines or periods. Afterwards, you could then deal with rearranging the words as
necessary, handling determining pairs, and then excluding any words that were given as arguments. I don't feel like the
code would have been massively different if it'd been done in reverse, but it definitely would have been more difficult
to write. On top of that, you'd still have to think all the way through how the code would need to function, regardless
of the order in which you wrote it, in order to know what needed to be written in the first place. Also, if you
literally reversed the order of the tests, it could lead to you skipping the whole point of using tests (if you did it
incorrectly). Again, for an example, if you wrote a test that produced the final output of the program as the initial
input and expected a correct output, you'd either have to hard code many many values in to the resulting code while you
implemented them for real, or you'd end up writing all necessary code at once, which would defeat the purpose.
2. What did you think of test driven development, for this problem? What are the strengths and weaknesses of the
approach? Does it encourage/discourage certain kinds of program designs?
I'm torn about it. I feel that for this particular problem, it may have been overkill and actually hampered design
progress at times. The main issue was that for each major step, the output of the code often changed enough that many
of the tests had to be modified or rewritten to match. The definite strengths of this approach are that you can easily
tell when you change something that breaks the functionality of your code, and it will immediately produce an exception.
On the flip side, it does take much longer to write functional code depend on what exactly you're writing. I would
argue that the larger the application being written, the more important it would be to do this kind of testing.
Without it, you might be digging through hundreds of thousands to millions of lines of code to figure out what is
breaking. I can definitely appreciate the fact that taking the time to write the tests helps guarantee that it's doing
what you want, but I feel like it would be more useful in other programming contexts, like the one just mentioned.
I'd say this style of coding encourages program designs that have a consistent output even if the underlying code
changes often. For example, I feel like it'd be ideal for developing an API where once a standard has been created,
that API should function identically for a long time, even if the backend is changing constantly. In general, this way
of programming also encourages full coverage of edge cases, as you'll easily and quickly be able to test for them.

View File

@@ -0,0 +1,2 @@
def kwic(document, listPairs=False, ignoreWords=[]):
return []

View File

@@ -0,0 +1,5 @@
def kwic(document, listPairs=False, ignoreWords=[]):
if not document:
return []
return [document]

View File

@@ -0,0 +1,7 @@
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
split_into_sentences = document.splitlines()
return [split_into_sentences]

View File

@@ -0,0 +1,10 @@
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = document.split(".")
else:
split_into_sentences = document.splitlines()
return [split_into_sentences]

View File

@@ -0,0 +1,29 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
return split_into_sentences

View File

@@ -0,0 +1,44 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def split_by_word_as_tuples(sentence_array):
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
return split_into_word_tuples

View File

@@ -0,0 +1,61 @@
def split_by_periods(document):
output_array = []
sentence_array_temp = ""
for current_char in document:
if current_char != "\n":
sentence_array_temp += current_char
if current_char == ".":
output_array.append(sentence_array_temp)
sentence_array_temp = ""
if sentence_array_temp:
output_array.append(sentence_array_temp)
return output_array
def split_by_word_as_tuples(sentence_array):
output_array = []
index_incrementer = 0
for sentence in sentence_array:
words_array = sentence.split(" ")
words_array = filter(None, words_array)
output_array.append((words_array, index_incrementer))
index_incrementer += 1
return output_array
def array_circular_shift(input_array, rotate_val):
output_array = input_array[rotate_val:] + input_array[:rotate_val]
return output_array
def fill_with_circular_shifts_and_original(sentence_array):
output_array = []
for current_tuple in sentence_array:
for index, _ in enumerate(current_tuple[0]):
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
return output_array
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
if not document:
return [], []
if periodsToBreaks:
split_into_sentences = split_by_periods(document)
else:
split_into_sentences = document.splitlines()
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
output_tuple = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
return output_tuple

View File

@@ -0,0 +1,55 @@
import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
this_is_split_periods_circular_output = [
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
(['Not', 'newlines.'], 1),
(['newlines.', 'Not'], 1)
]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic.kwic(empty_document) == ([]))
# Ensure real input does not produce empty output
assert(kwic.kwic(design_words_doc) != ([]))
# Make sure it's broken into two lines
assert(len(kwic.kwic(design_words_doc)) == 6)
# Make sure it's broken into four line
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
# Just realized I can check each individual function, so here is the check to split by periods
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
["This is something split by periods.", " Not newlines."])
# This checks to make sure a sentence gets split into words properly
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
# These check to make sure that the circular shift function is rotating properly
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
["One", "Two", "Three", "Four", "Five"])
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
["Three", "Four", "Five", "One", "Two"])
# This checks to make sure than circularly shifted versions of all sentences are correct
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['Not', 'newlines.'], 1)]) ==
this_is_split_periods_circular_output)
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
# correctly...
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
this_is_split_periods_circular_output)

View File

@@ -0,0 +1,6 @@
from kwic import kwic
document = "" # Input no data
if __name__ == "__main__":
assert(kwic(document) == []) # Ensure that results are empty because there's no data

View File

@@ -0,0 +1,8 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
if __name__ == "__main__":
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output

View File

@@ -0,0 +1,12 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
if __name__ == "__main__":
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
assert(len(kwic(design_words_doc)[0]) == 2) # [(), ()] Make sure it's broken into two lines
assert(len(kwic(goodbye_buddy_doc)[0]) == 4) # [(), ()] Make sure it's broken into four lines

View File

@@ -0,0 +1,25 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic(empty_document) == [])
# Ensure real input does not produce empty output
assert(kwic(design_words_doc) != [])
# Make sure it's broken into two lines
assert(len(kwic(design_words_doc)[0]) == 2)
# Make sure it's broken into four line
assert(len(kwic(goodbye_buddy_doc)[0]) == 4)
# Make sure line with just periods only shows up as one normally
assert(len(kwic(hello_buddy_periods)[0]) == 1)
# Make sure it's broken into four lines once it's broken by periods instead
assert(len(kwic(hello_buddy_periods, periodsToBreaks=True)[0]) == 4)

View File

@@ -0,0 +1,29 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
hello_buddy_periods_output = ["Hello there.", " Hello there, buddy.", " Hello and goodbye, buddy.",
" Hello is like buddy Goodbye!"]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic(empty_document) == [])
# Ensure real input does not produce empty output
assert(kwic(design_words_doc) != [])
# Make sure it's broken into two lines
assert(len(kwic(design_words_doc)) == 2)
# Make sure it's broken into four line
assert(len(kwic(goodbye_buddy_doc)) == 4)
# Make sure line with just periods shows up as itself
assert(kwic(hello_buddy_periods)[0] == hello_buddy_periods)
# Make sure it's broken into four lines once it's broken by periods instead
# Also, this time it keeps the ending period like it's supposed to
assert(kwic(hello_buddy_periods, periodsToBreaks=True) == hello_buddy_periods_output)

View File

@@ -0,0 +1,33 @@
from kwic import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
hello_buddy_periods_output = [(["Hello", "there.", "Hello", "there,", "buddy.", "Hello", "and", "goodbye,", "buddy.",
"Hello", "is", "like", "buddy", "Goodbye!"], 0)]
hello_buddy_word_tuples_output = [(["Hello", "there."], 0),
(["Hello", "there,", "buddy."], 1),
(["Hello", "and", "goodbye,", "buddy."], 2),
(["Hello", "is", "like", "buddy", "Goodbye!"], 3)]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic(empty_document) == [])
# Ensure real input does not produce empty output
assert(kwic(design_words_doc) != [])
# Make sure it's broken into two lines
assert(len(kwic(design_words_doc)) == 2)
# Make sure it's broken into four line
assert(len(kwic(goodbye_buddy_doc)) == 4)
# Make sure array contains same elements from kwic vs test output
assert(any(x in kwic(hello_buddy_periods) for x in hello_buddy_periods_output))
# Make sure arrays contain the same elements from kwic vs test, even with periods for breaks...
assert(any(x in kwic(hello_buddy_periods, periodsToBreaks=True) for x in hello_buddy_word_tuples_output))

View File

@@ -0,0 +1,55 @@
import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
this_is_split_periods_circular_output = [
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
(['Not', 'newlines.'], 1),
(['newlines.', 'Not'], 1)
]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic.kwic(empty_document) == ([], []))
# Ensure real input does not produce empty output
assert(kwic.kwic(design_words_doc) != ([], []))
# Make sure it's broken into two lines
assert(len(kwic.kwic(design_words_doc)[0]) == 6)
# Make sure it's broken into four line
assert(len(kwic.kwic(goodbye_buddy_doc)[0]) == 14)
# Just realized I can check each individual function, so here is the check to split by periods
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
["This is something split by periods.", " Not newlines."])
# This checks to make sure a sentence gets split into words properly
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
# These check to make sure that the circular shift function is rotating properly
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
["One", "Two", "Three", "Four", "Five"])
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
["Three", "Four", "Five", "One", "Two"])
# This checks to make sure than circularly shifted versions of all sentences are correct
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['Not', 'newlines.'], 1)]) ==
this_is_split_periods_circular_output)
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
# correctly...
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
(this_is_split_periods_circular_output, []))

View File

@@ -0,0 +1,55 @@
import kwic
empty_document = ""
design_words_doc = "Design is hard.\nLet's just implement."
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
this_is_split_periods_circular_output = [
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
(['Not', 'newlines.'], 1),
(['newlines.', 'Not'], 1)
]
if __name__ == "__main__":
# Ensure empty input gives empty output
assert(kwic.kwic(empty_document) == ([]))
# Ensure real input does not produce empty output
assert(kwic.kwic(design_words_doc) != ([]))
# Make sure it's broken into two lines
assert(len(kwic.kwic(design_words_doc)) == 6)
# Make sure it's broken into four line
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
# Just realized I can check each individual function, so here is the check to split by periods
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
["This is something split by periods.", " Not newlines."])
# This checks to make sure a sentence gets split into words properly
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
# These check to make sure that the circular shift function is rotating properly
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
["One", "Two", "Three", "Four", "Five"])
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
["Three", "Four", "Five", "One", "Two"])
# This checks to make sure than circularly shifted versions of all sentences are correct
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
(['Not', 'newlines.'], 1)]) ==
this_is_split_periods_circular_output)
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
# correctly...
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
this_is_split_periods_circular_output)

View File

@@ -0,0 +1,6 @@
1) ignoreWords only applies to indexing (the shifts) -- it has no impact on listPairs.
2) In listPairs, a "pair" is two (different) words that appear together in a "line" -- they may not be next to each other, they are just in the same line.
3) The list of pairs should have structure of list of tuple of tuple, I'll fix the bad example in the assignment. Not lists.
4) Submit files as a flat folder structure!!! kwic0.py testkwic0.py
5) Use assert to test. It must throw an uncaught exception.
6) The test testkwicN should pass kiwcN, but not if you ran it against a newer

View File

@@ -0,0 +1,10 @@
from mykwic import *
from pprint import pprint
for l in open("tocheck.txt"):
print "="*50
input = l[:-1]
print "INPUT:",input
v = eval("kwic("+input+")")
print "OUTPUT:"
pprint

View File

@@ -0,0 +1,326 @@
==================================================
INPUT: "Design is hard.\nLet's just implement."
OUTPUT:
[(['Design', 'is', 'hard.'], 0),
(['hard.', 'Design', 'is'], 0),
(['implement.', "Let's", 'just'], 1),
(['is', 'hard.', 'Design'], 0),
(['just', 'implement.', "Let's"], 1),
(["Let's", 'just', 'implement.'], 1)]
==================================================
INPUT: "Design is hard.\nLet's just implement.", ignoreWords=["is"]
OUTPUT:
[(['Design', 'is', 'hard.'], 0),
(['hard.', 'Design', 'is'], 0),
(['implement.', "Let's", 'just'], 1),
(['just', 'implement.', "Let's"], 1),
(["Let's", 'just', 'implement.'], 1)]
==================================================
INPUT: "Design is hard.\nLet's just implement.", ignoreWords=["is"], listPairs=True
OUTPUT:
([(['Design', 'is', 'hard.'], 0),
(['hard.', 'Design', 'is'], 0),
(['implement.', "Let's", 'just'], 1),
(['just', 'implement.', "Let's"], 1),
(["Let's", 'just', 'implement.'], 1)],
[])
==================================================
INPUT: "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!", listPairs=True
OUTPUT:
([(['and', 'goodbye,', 'buddy.', 'Hello'], 2),
(['buddy', 'Goodbye!', 'Hello', 'is', 'like'], 3),
(['buddy.', 'Hello', 'and', 'goodbye,'], 2),
(['buddy.', 'Hello', 'there,'], 1),
(['Goodbye!', 'Hello', 'is', 'like', 'buddy'], 3),
(['goodbye,', 'buddy.', 'Hello', 'and'], 2),
(['Hello', 'and', 'goodbye,', 'buddy.'], 2),
(['Hello', 'is', 'like', 'buddy', 'Goodbye!'], 3),
(['Hello', 'there,', 'buddy.'], 1),
(['Hello', 'there.'], 0),
(['is', 'like', 'buddy', 'Goodbye!', 'Hello'], 3),
(['like', 'buddy', 'Goodbye!', 'Hello', 'is'], 3),
(['there,', 'buddy.', 'Hello'], 1),
(['there.', 'Hello'], 0)],
[(('buddy', 'goodbye'), 2),
(('buddy', 'hello'), 3),
(('goodbye', 'hello'), 2),
(('hello', 'there'), 2)])
==================================================
INPUT: "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!", listPairs=True, periodsToBreaks=True
OUTPUT:
([(['and', 'goodbye,', 'buddy.', 'Hello'], 2),
(['buddy', 'Goodbye!', 'Hello', 'is', 'like'], 3),
(['buddy.', 'Hello', 'and', 'goodbye,'], 2),
(['buddy.', 'Hello', 'there,'], 1),
(['Goodbye!', 'Hello', 'is', 'like', 'buddy'], 3),
(['goodbye,', 'buddy.', 'Hello', 'and'], 2),
(['Hello', 'and', 'goodbye,', 'buddy.'], 2),
(['Hello', 'is', 'like', 'buddy', 'Goodbye!'], 3),
(['Hello', 'there,', 'buddy.'], 1),
(['Hello', 'there.'], 0),
(['is', 'like', 'buddy', 'Goodbye!', 'Hello'], 3),
(['like', 'buddy', 'Goodbye!', 'Hello', 'is'], 3),
(['there,', 'buddy.', 'Hello'], 1),
(['there.', 'Hello'], 0)],
[(('buddy', 'goodbye'), 2),
(('buddy', 'hello'), 3),
(('goodbye', 'hello'), 2),
(('hello', 'there'), 2)])
==================================================
INPUT: ". . a"
OUTPUT:
[(['.', '.', 'a'], 0), (['.', 'a', '.'], 0), (['a', '.', '.'], 0)]
==================================================
INPUT: ". . a", periodsToBreaks=True
OUTPUT:
[(['.', '.', 'a'], 0), (['.', 'a', '.'], 0), (['a', '.', '.'], 0)]
==================================================
INPUT: ". A B\n. A B C\n. A B C D", listPairs=True
OUTPUT:
([(['.', 'A', 'B'], 0),
(['.', 'A', 'B', 'C'], 1),
(['.', 'A', 'B', 'C', 'D'], 2),
(['A', 'B', '.'], 0),
(['A', 'B', 'C', '.'], 1),
(['A', 'B', 'C', 'D', '.'], 2),
(['B', '.', 'A'], 0),
(['B', 'C', '.', 'A'], 1),
(['B', 'C', 'D', '.', 'A'], 2),
(['C', '.', 'A', 'B'], 1),
(['C', 'D', '.', 'A', 'B'], 2),
(['D', '.', 'A', 'B', 'C'], 2)],
[(('a', 'b'), 3), (('a', 'c'), 2), (('b', 'c'), 2)])
==================================================
INPUT: "Hello world. This is a test\nhopefully it turns out okay", periodsToBreaks = True
OUTPUT:
[(['a', 'test', 'hopefully', 'it', 'turns', 'out', 'okay', 'This', 'is'], 1),
(['Hello', 'world.'], 0),
(['hopefully', 'it', 'turns', 'out', 'okay', 'This', 'is', 'a', 'test'], 1),
(['is', 'a', 'test', 'hopefully', 'it', 'turns', 'out', 'okay', 'This'], 1),
(['it', 'turns', 'out', 'okay', 'This', 'is', 'a', 'test', 'hopefully'], 1),
(['okay', 'This', 'is', 'a', 'test', 'hopefully', 'it', 'turns', 'out'], 1),
(['out', 'okay', 'This', 'is', 'a', 'test', 'hopefully', 'it', 'turns'], 1),
(['test', 'hopefully', 'it', 'turns', 'out', 'okay', 'This', 'is', 'a'], 1),
(['This', 'is', 'a', 'test', 'hopefully', 'it', 'turns', 'out', 'okay'], 1),
(['turns', 'out', 'okay', 'This', 'is', 'a', 'test', 'hopefully', 'it'], 1),
(['world.', 'Hello'], 0)]
==================================================
INPUT: "It's very nice to be footloose. \nWith just a toothbrush and a comb.\n"
OUTPUT:
[(['a', 'comb.', 'With', 'just', 'a', 'toothbrush', 'and'], 1),
(['a', 'toothbrush', 'and', 'a', 'comb.', 'With', 'just'], 1),
(['and', 'a', 'comb.', 'With', 'just', 'a', 'toothbrush'], 1),
(['be', 'footloose.', "It's", 'very', 'nice', 'to'], 0),
(['comb.', 'With', 'just', 'a', 'toothbrush', 'and', 'a'], 1),
(['footloose.', "It's", 'very', 'nice', 'to', 'be'], 0),
(["It's", 'very', 'nice', 'to', 'be', 'footloose.'], 0),
(['just', 'a', 'toothbrush', 'and', 'a', 'comb.', 'With'], 1),
(['nice', 'to', 'be', 'footloose.', "It's", 'very'], 0),
(['to', 'be', 'footloose.', "It's", 'very', 'nice'], 0),
(['toothbrush', 'and', 'a', 'comb.', 'With', 'just', 'a'], 1),
(['very', 'nice', 'to', 'be', 'footloose.', "It's"], 0),
(['With', 'just', 'a', 'toothbrush', 'and', 'a', 'comb.'], 1)]
==================================================
INPUT: "It's very nice to be footloose. \nWith just a toothbrush and a comb.\n", periodsToBreaks=True
OUTPUT:
[(['a', 'comb.', 'With', 'just', 'a', 'toothbrush', 'and'], 1),
(['a', 'toothbrush', 'and', 'a', 'comb.', 'With', 'just'], 1),
(['and', 'a', 'comb.', 'With', 'just', 'a', 'toothbrush'], 1),
(['be', 'footloose.', "It's", 'very', 'nice', 'to'], 0),
(['comb.', 'With', 'just', 'a', 'toothbrush', 'and', 'a'], 1),
(['footloose.', "It's", 'very', 'nice', 'to', 'be'], 0),
(["It's", 'very', 'nice', 'to', 'be', 'footloose.'], 0),
(['just', 'a', 'toothbrush', 'and', 'a', 'comb.', 'With'], 1),
(['nice', 'to', 'be', 'footloose.', "It's", 'very'], 0),
(['to', 'be', 'footloose.', "It's", 'very', 'nice'], 0),
(['toothbrush', 'and', 'a', 'comb.', 'With', 'just', 'a'], 1),
(['very', 'nice', 'to', 'be', 'footloose.', "It's"], 0),
(['With', 'just', 'a', 'toothbrush', 'and', 'a', 'comb.'], 1)]
==================================================
INPUT: "hello here, hello there, hello everywhere",listPairs = True
OUTPUT:
([(['everywhere', 'hello', 'here,', 'hello', 'there,', 'hello'], 0),
(['hello', 'everywhere', 'hello', 'here,', 'hello', 'there,'], 0),
(['hello', 'here,', 'hello', 'there,', 'hello', 'everywhere'], 0),
(['hello', 'there,', 'hello', 'everywhere', 'hello', 'here,'], 0),
(['here,', 'hello', 'there,', 'hello', 'everywhere', 'hello'], 0),
(['there,', 'hello', 'everywhere', 'hello', 'here,', 'hello'], 0)],
[])
==================================================
INPUT: "hello here\nhello here again\nhello again", listPairs=True
OUTPUT:
([(['again', 'hello'], 2),
(['again', 'hello', 'here'], 1),
(['hello', 'again'], 2),
(['hello', 'here'], 0),
(['hello', 'here', 'again'], 1),
(['here', 'again', 'hello'], 1),
(['here', 'hello'], 0)],
[(('again', 'hello'), 2), (('hello', 'here'), 2)])
==================================================
INPUT: "hello hello hello\nhello hello", listPairs=True
OUTPUT:
([(['hello', 'hello'], 1),
(['hello', 'hello'], 1),
(['hello', 'hello', 'hello'], 0),
(['hello', 'hello', 'hello'], 0),
(['hello', 'hello', 'hello'], 0)],
[])
==================================================
INPUT: "to be or not to be", listPairs=True
OUTPUT:
([(['be', 'or', 'not', 'to', 'be', 'to'], 0),
(['be', 'to', 'be', 'or', 'not', 'to'], 0),
(['not', 'to', 'be', 'to', 'be', 'or'], 0),
(['or', 'not', 'to', 'be', 'to', 'be'], 0),
(['to', 'be', 'or', 'not', 'to', 'be'], 0),
(['to', 'be', 'to', 'be', 'or', 'not'], 0)],
[])
==================================================
INPUT: ". A B\n. A B C\n. A B C D", listPairs=True
OUTPUT:
([(['.', 'A', 'B'], 0),
(['.', 'A', 'B', 'C'], 1),
(['.', 'A', 'B', 'C', 'D'], 2),
(['A', 'B', '.'], 0),
(['A', 'B', 'C', '.'], 1),
(['A', 'B', 'C', 'D', '.'], 2),
(['B', '.', 'A'], 0),
(['B', 'C', '.', 'A'], 1),
(['B', 'C', 'D', '.', 'A'], 2),
(['C', '.', 'A', 'B'], 1),
(['C', 'D', '.', 'A', 'B'], 2),
(['D', '.', 'A', 'B', 'C'], 2)],
[(('a', 'b'), 3), (('a', 'c'), 2), (('b', 'c'), 2)])
==================================================
INPUT: "a bad\ncat barks."
OUTPUT:
[(['a', 'bad'], 0),
(['bad', 'a'], 0),
(['barks.', 'cat'], 1),
(['cat', 'barks.'], 1)]
==================================================
INPUT: "This is not a sentence.\nNeither is this.",ignoreWords=["is."]
OUTPUT:
[(['a', 'sentence.', 'This', 'is', 'not'], 0),
(['is', 'not', 'a', 'sentence.', 'This'], 0),
(['is', 'this.', 'Neither'], 1),
(['Neither', 'is', 'this.'], 1),
(['not', 'a', 'sentence.', 'This', 'is'], 0),
(['sentence.', 'This', 'is', 'not', 'a'], 0),
(['This', 'is', 'not', 'a', 'sentence.'], 0),
(['this.', 'Neither', 'is'], 1)]
==================================================
INPUT: "This is not a sentence.\nNeither is this.",ignoreWords=["is"]
OUTPUT:
[(['a', 'sentence.', 'This', 'is', 'not'], 0),
(['Neither', 'is', 'this.'], 1),
(['not', 'a', 'sentence.', 'This', 'is'], 0),
(['sentence.', 'This', 'is', 'not', 'a'], 0),
(['This', 'is', 'not', 'a', 'sentence.'], 0),
(['this.', 'Neither', 'is'], 1)]
==================================================
INPUT: "hello hello\nhello hello"
OUTPUT:
[(['hello', 'hello'], 0),
(['hello', 'hello'], 0),
(['hello', 'hello'], 1),
(['hello', 'hello'], 1)]
==================================================
INPUT: "#!good morning", ignoreWords = ['!good']
OUTPUT:
[(['#!good', 'morning'], 0), (['morning', '#!good'], 0)]
==================================================
INPUT: "go!od morning-!", ignoreWords = ['good']
OUTPUT:
[(['morning-!', 'go!od'], 0)]
==================================================
INPUT: "#!good morning-!", ignoreWords = ['!GoOd']
OUTPUT:
[(['#!good', 'morning-!'], 0), (['morning-!', '#!good'], 0)]
==================================================
INPUT: "?!good morning-!", ignoreWords = ['!GoOd']
OUTPUT:
[(['?!good', 'morning-!'], 0), (['morning-!', '?!good'], 0)]
==================================================
INPUT: "?!go!!!od morning-!", ignoreWords = ['!GoOd']
OUTPUT:
[(['?!go!!!od', 'morning-!'], 0), (['morning-!', '?!go!!!od'], 0)]
==================================================
INPUT: 'This pair? is good.\n So is this pair and that pair',listPairs=True
OUTPUT:
([(['and', 'that', 'pair', 'So', 'is', 'this', 'pair'], 1),
(['good.', 'This', 'pair?', 'is'], 0),
(['is', 'good.', 'This', 'pair?'], 0),
(['is', 'this', 'pair', 'and', 'that', 'pair', 'So'], 1),
(['pair', 'and', 'that', 'pair', 'So', 'is', 'this'], 1),
(['pair', 'So', 'is', 'this', 'pair', 'and', 'that'], 1),
(['pair?', 'is', 'good.', 'This'], 0),
(['So', 'is', 'this', 'pair', 'and', 'that', 'pair'], 1),
(['that', 'pair', 'So', 'is', 'this', 'pair', 'and'], 1),
(['this', 'pair', 'and', 'that', 'pair', 'So', 'is'], 1),
(['This', 'pair?', 'is', 'good.'], 0)],
[(('is', 'pair'), 2), (('is', 'this'), 2), (('pair', 'this'), 2)])
==================================================
INPUT: "CS is cool"
OUTPUT:
[(['cool', 'CS', 'is'], 0),
(['CS', 'is', 'cool'], 0),
(['is', 'cool', 'CS'], 0)]
==================================================
INPUT: "a b\na b c\na b c d", listPairs=True
OUTPUT:
([(['a', 'b'], 0),
(['a', 'b', 'c'], 1),
(['a', 'b', 'c', 'd'], 2),
(['b', 'a'], 0),
(['b', 'c', 'a'], 1),
(['b', 'c', 'd', 'a'], 2),
(['c', 'a', 'b'], 1),
(['c', 'd', 'a', 'b'], 2),
(['d', 'a', 'b', 'c'], 2)],
[(('a', 'b'), 3), (('a', 'c'), 2), (('b', 'c'), 2)])
==================================================
INPUT: 'This pair? is good.\n So is this pair and that pair', listPairs=True
OUTPUT:
([(['and', 'that', 'pair', 'So', 'is', 'this', 'pair'], 1),
(['good.', 'This', 'pair?', 'is'], 0),
(['is', 'good.', 'This', 'pair?'], 0),
(['is', 'this', 'pair', 'and', 'that', 'pair', 'So'], 1),
(['pair', 'and', 'that', 'pair', 'So', 'is', 'this'], 1),
(['pair', 'So', 'is', 'this', 'pair', 'and', 'that'], 1),
(['pair?', 'is', 'good.', 'This'], 0),
(['So', 'is', 'this', 'pair', 'and', 'that', 'pair'], 1),
(['that', 'pair', 'So', 'is', 'this', 'pair', 'and'], 1),
(['this', 'pair', 'and', 'that', 'pair', 'So', 'is'], 1),
(['This', 'pair?', 'is', 'good.'], 0)],
[(('is', 'pair'), 2), (('is', 'this'), 2), (('pair', 'this'), 2)])

View File

@@ -0,0 +1,29 @@
"Design is hard.\nLet's just implement."
"Design is hard.\nLet's just implement.", ignoreWords=["is"]
"Design is hard.\nLet's just implement.", ignoreWords=["is"], listPairs=True
"Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!", listPairs=True
"Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!", listPairs=True, periodsToBreaks=True
". . a"
". . a", periodsToBreaks=True
". A B\n. A B C\n. A B C D", listPairs=True
"Hello world. This is a test\nhopefully it turns out okay", periodsToBreaks = True
"It's very nice to be footloose. \nWith just a toothbrush and a comb.\n"
"It's very nice to be footloose. \nWith just a toothbrush and a comb.\n", periodsToBreaks=True
"hello here, hello there, hello everywhere",listPairs = True
"hello here\nhello here again\nhello again", listPairs=True
"hello hello hello\nhello hello", listPairs=True
"to be or not to be", listPairs=True
". A B\n. A B C\n. A B C D", listPairs=True
"a bad\ncat barks."
"This is not a sentence.\nNeither is this.",ignoreWords=["is."]
"This is not a sentence.\nNeither is this.",ignoreWords=["is"]
"hello hello\nhello hello"
"#!good morning", ignoreWords = ['!good']
"go!od morning-!", ignoreWords = ['good']
"#!good morning-!", ignoreWords = ['!GoOd']
"?!good morning-!", ignoreWords = ['!GoOd']
"?!go!!!od morning-!", ignoreWords = ['!GoOd']
'This pair? is good.\n So is this pair and that pair',listPairs=True
"CS is cool"
"a b\na b c\na b c d", listPairs=True
'This pair? is good.\n So is this pair and that pair', listPairs=True