mirror of
https://github.com/caperren/school_archives.git
synced 2025-11-09 21:51:15 +00:00
Added work from my other class repositories before deletion
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=[]):
|
||||
return []
|
||||
@@ -0,0 +1,6 @@
|
||||
from kwic import kwic
|
||||
|
||||
document = "" # Input no data
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(document) == []) # Ensure that results are empty because there's no data
|
||||
@@ -0,0 +1,5 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=[]):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
return [document]
|
||||
@@ -0,0 +1,8 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
|
||||
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
|
||||
@@ -0,0 +1,7 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return [split_into_sentences]
|
||||
@@ -0,0 +1,12 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
|
||||
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
|
||||
|
||||
assert(len(kwic(design_words_doc)[0]) == 2) # [(), ()] Make sure it's broken into two lines
|
||||
assert(len(kwic(goodbye_buddy_doc)[0]) == 4) # [(), ()] Make sure it's broken into four lines
|
||||
@@ -0,0 +1,10 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = document.split(".")
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return [split_into_sentences]
|
||||
@@ -0,0 +1,25 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)[0]) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)[0]) == 4)
|
||||
|
||||
# Make sure line with just periods only shows up as one normally
|
||||
assert(len(kwic(hello_buddy_periods)[0]) == 1)
|
||||
|
||||
# Make sure it's broken into four lines once it's broken by periods instead
|
||||
assert(len(kwic(hello_buddy_periods, periodsToBreaks=True)[0]) == 4)
|
||||
@@ -0,0 +1,29 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return split_into_sentences
|
||||
@@ -0,0 +1,29 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
hello_buddy_periods_output = ["Hello there.", " Hello there, buddy.", " Hello and goodbye, buddy.",
|
||||
" Hello is like buddy Goodbye!"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)) == 4)
|
||||
|
||||
# Make sure line with just periods shows up as itself
|
||||
assert(kwic(hello_buddy_periods)[0] == hello_buddy_periods)
|
||||
|
||||
# Make sure it's broken into four lines once it's broken by periods instead
|
||||
# Also, this time it keeps the ending period like it's supposed to
|
||||
assert(kwic(hello_buddy_periods, periodsToBreaks=True) == hello_buddy_periods_output)
|
||||
@@ -0,0 +1,44 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
return split_into_word_tuples
|
||||
@@ -0,0 +1,33 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
hello_buddy_periods_output = [(["Hello", "there.", "Hello", "there,", "buddy.", "Hello", "and", "goodbye,", "buddy.",
|
||||
"Hello", "is", "like", "buddy", "Goodbye!"], 0)]
|
||||
|
||||
hello_buddy_word_tuples_output = [(["Hello", "there."], 0),
|
||||
(["Hello", "there,", "buddy."], 1),
|
||||
(["Hello", "and", "goodbye,", "buddy."], 2),
|
||||
(["Hello", "is", "like", "buddy", "Goodbye!"], 3)]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)) == 4)
|
||||
|
||||
# Make sure array contains same elements from kwic vs test output
|
||||
assert(any(x in kwic(hello_buddy_periods) for x in hello_buddy_periods_output))
|
||||
|
||||
# Make sure arrays contain the same elements from kwic vs test, even with periods for breaks...
|
||||
assert(any(x in kwic(hello_buddy_periods, periodsToBreaks=True) for x in hello_buddy_word_tuples_output))
|
||||
@@ -0,0 +1,61 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def array_circular_shift(input_array, rotate_val):
|
||||
output_array = input_array[rotate_val:] + input_array[:rotate_val]
|
||||
return output_array
|
||||
|
||||
|
||||
def fill_with_circular_shifts_and_original(sentence_array):
|
||||
output_array = []
|
||||
|
||||
for current_tuple in sentence_array:
|
||||
for index, _ in enumerate(current_tuple[0]):
|
||||
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return [], []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
output_tuple = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
|
||||
|
||||
return output_tuple
|
||||
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([], []))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([], []))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)[0]) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)[0]) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
(this_is_split_periods_circular_output, []))
|
||||
@@ -0,0 +1,64 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def array_circular_shift(input_array, rotate_val):
|
||||
output_array = input_array[rotate_val:] + input_array[:rotate_val]
|
||||
return output_array
|
||||
|
||||
|
||||
def fill_with_circular_shifts_and_original(sentence_array):
|
||||
output_array = []
|
||||
|
||||
for current_tuple in sentence_array:
|
||||
for index, _ in enumerate(current_tuple[0]):
|
||||
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.split('\n')
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
if listPairs:
|
||||
output_data = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
|
||||
else:
|
||||
output_data = fill_with_circular_shifts_and_original(split_into_word_tuples)
|
||||
|
||||
return output_data
|
||||
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([]))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([]))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
this_is_split_periods_circular_output)
|
||||
@@ -0,0 +1,64 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def array_circular_shift(input_array, rotate_val):
|
||||
output_array = input_array[rotate_val:] + input_array[:rotate_val]
|
||||
return output_array
|
||||
|
||||
|
||||
def fill_with_circular_shifts_and_original(sentence_array):
|
||||
output_array = []
|
||||
|
||||
for current_tuple in sentence_array:
|
||||
for index, _ in enumerate(current_tuple[0]):
|
||||
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
if listPairs:
|
||||
output_data = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
|
||||
else:
|
||||
output_data = fill_with_circular_shifts_and_original(split_into_word_tuples)
|
||||
|
||||
return output_data
|
||||
@@ -0,0 +1,31 @@
|
||||
1. How did you decide what to test first? Would your final code change significantly if you changed the order of tests?
|
||||
|
||||
The process for deciding what to test mostly had to do with a logical breakdown of the core parts of the code so that
|
||||
things that relied on others to work were done after those pre-requisites. So, for example, before we could do anything
|
||||
first the document had to be broken down into sentences, then words. In that part, you could also change whether or not
|
||||
the sentence breaks were handled with newlines or periods. Afterwards, you could then deal with rearranging the words as
|
||||
necessary, handling determining pairs, and then excluding any words that were given as arguments. I don't feel like the
|
||||
code would have been massively different if it'd been done in reverse, but it definitely would have been more difficult
|
||||
to write. On top of that, you'd still have to think all the way through how the code would need to function, regardless
|
||||
of the order in which you wrote it, in order to know what needed to be written in the first place. Also, if you
|
||||
literally reversed the order of the tests, it could lead to you skipping the whole point of using tests (if you did it
|
||||
incorrectly). Again, for an example, if you wrote a test that produced the final output of the program as the initial
|
||||
input and expected a correct output, you'd either have to hard code many many values in to the resulting code while you
|
||||
implemented them for real, or you'd end up writing all necessary code at once, which would defeat the purpose.
|
||||
|
||||
2. What did you think of test driven development, for this problem? What are the strengths and weaknesses of the
|
||||
approach? Does it encourage/discourage certain kinds of program designs?
|
||||
|
||||
I'm torn about it. I feel that for this particular problem, it may have been overkill and actually hampered design
|
||||
progress at times. The main issue was that for each major step, the output of the code often changed enough that many
|
||||
of the tests had to be modified or rewritten to match. The definite strengths of this approach are that you can easily
|
||||
tell when you change something that breaks the functionality of your code, and it will immediately produce an exception.
|
||||
On the flip side, it does take much longer to write functional code depend on what exactly you're writing. I would
|
||||
argue that the larger the application being written, the more important it would be to do this kind of testing.
|
||||
Without it, you might be digging through hundreds of thousands to millions of lines of code to figure out what is
|
||||
breaking. I can definitely appreciate the fact that taking the time to write the tests helps guarantee that it's doing
|
||||
what you want, but I feel like it would be more useful in other programming contexts, like the one just mentioned.
|
||||
I'd say this style of coding encourages program designs that have a consistent output even if the underlying code
|
||||
changes often. For example, I feel like it'd be ideal for developing an API where once a standard has been created,
|
||||
that API should function identically for a long time, even if the backend is changing constantly. In general, this way
|
||||
of programming also encourages full coverage of edge cases, as you'll easily and quickly be able to test for them.
|
||||
@@ -0,0 +1,2 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=[]):
|
||||
return []
|
||||
@@ -0,0 +1,5 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=[]):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
return [document]
|
||||
@@ -0,0 +1,7 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return [split_into_sentences]
|
||||
@@ -0,0 +1,10 @@
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = document.split(".")
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return [split_into_sentences]
|
||||
@@ -0,0 +1,29 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
return split_into_sentences
|
||||
@@ -0,0 +1,44 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
return split_into_word_tuples
|
||||
@@ -0,0 +1,61 @@
|
||||
def split_by_periods(document):
|
||||
output_array = []
|
||||
|
||||
sentence_array_temp = ""
|
||||
|
||||
for current_char in document:
|
||||
if current_char != "\n":
|
||||
sentence_array_temp += current_char
|
||||
|
||||
if current_char == ".":
|
||||
output_array.append(sentence_array_temp)
|
||||
sentence_array_temp = ""
|
||||
|
||||
if sentence_array_temp:
|
||||
output_array.append(sentence_array_temp)
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def split_by_word_as_tuples(sentence_array):
|
||||
output_array = []
|
||||
index_incrementer = 0
|
||||
|
||||
for sentence in sentence_array:
|
||||
words_array = sentence.split(" ")
|
||||
words_array = filter(None, words_array)
|
||||
output_array.append((words_array, index_incrementer))
|
||||
index_incrementer += 1
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def array_circular_shift(input_array, rotate_val):
|
||||
output_array = input_array[rotate_val:] + input_array[:rotate_val]
|
||||
return output_array
|
||||
|
||||
|
||||
def fill_with_circular_shifts_and_original(sentence_array):
|
||||
output_array = []
|
||||
|
||||
for current_tuple in sentence_array:
|
||||
for index, _ in enumerate(current_tuple[0]):
|
||||
output_array.append((array_circular_shift(current_tuple[0], index), current_tuple[1]))
|
||||
|
||||
return output_array
|
||||
|
||||
|
||||
def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
|
||||
if not document:
|
||||
return [], []
|
||||
|
||||
if periodsToBreaks:
|
||||
split_into_sentences = split_by_periods(document)
|
||||
else:
|
||||
split_into_sentences = document.splitlines()
|
||||
|
||||
split_into_word_tuples = split_by_word_as_tuples(split_into_sentences)
|
||||
|
||||
output_tuple = (fill_with_circular_shifts_and_original(split_into_word_tuples), [])
|
||||
|
||||
return output_tuple
|
||||
Binary file not shown.
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([]))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([]))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
this_is_split_periods_circular_output)
|
||||
@@ -0,0 +1,6 @@
|
||||
from kwic import kwic
|
||||
|
||||
document = "" # Input no data
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(document) == []) # Ensure that results are empty because there's no data
|
||||
@@ -0,0 +1,8 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
|
||||
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
|
||||
@@ -0,0 +1,12 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert(kwic(empty_document) == []) # Ensure empty input gives empty output
|
||||
assert(kwic(design_words_doc) != []) # Ensure real input does not produce empty output
|
||||
|
||||
assert(len(kwic(design_words_doc)[0]) == 2) # [(), ()] Make sure it's broken into two lines
|
||||
assert(len(kwic(goodbye_buddy_doc)[0]) == 4) # [(), ()] Make sure it's broken into four lines
|
||||
@@ -0,0 +1,25 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)[0]) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)[0]) == 4)
|
||||
|
||||
# Make sure line with just periods only shows up as one normally
|
||||
assert(len(kwic(hello_buddy_periods)[0]) == 1)
|
||||
|
||||
# Make sure it's broken into four lines once it's broken by periods instead
|
||||
assert(len(kwic(hello_buddy_periods, periodsToBreaks=True)[0]) == 4)
|
||||
@@ -0,0 +1,29 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
hello_buddy_periods_output = ["Hello there.", " Hello there, buddy.", " Hello and goodbye, buddy.",
|
||||
" Hello is like buddy Goodbye!"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)) == 4)
|
||||
|
||||
# Make sure line with just periods shows up as itself
|
||||
assert(kwic(hello_buddy_periods)[0] == hello_buddy_periods)
|
||||
|
||||
# Make sure it's broken into four lines once it's broken by periods instead
|
||||
# Also, this time it keeps the ending period like it's supposed to
|
||||
assert(kwic(hello_buddy_periods, periodsToBreaks=True) == hello_buddy_periods_output)
|
||||
@@ -0,0 +1,33 @@
|
||||
from kwic import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
hello_buddy_periods_output = [(["Hello", "there.", "Hello", "there,", "buddy.", "Hello", "and", "goodbye,", "buddy.",
|
||||
"Hello", "is", "like", "buddy", "Goodbye!"], 0)]
|
||||
|
||||
hello_buddy_word_tuples_output = [(["Hello", "there."], 0),
|
||||
(["Hello", "there,", "buddy."], 1),
|
||||
(["Hello", "and", "goodbye,", "buddy."], 2),
|
||||
(["Hello", "is", "like", "buddy", "Goodbye!"], 3)]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic(empty_document) == [])
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic(design_words_doc) != [])
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic(design_words_doc)) == 2)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic(goodbye_buddy_doc)) == 4)
|
||||
|
||||
# Make sure array contains same elements from kwic vs test output
|
||||
assert(any(x in kwic(hello_buddy_periods) for x in hello_buddy_periods_output))
|
||||
|
||||
# Make sure arrays contain the same elements from kwic vs test, even with periods for breaks...
|
||||
assert(any(x in kwic(hello_buddy_periods, periodsToBreaks=True) for x in hello_buddy_word_tuples_output))
|
||||
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([], []))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([], []))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)[0]) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)[0]) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
(this_is_split_periods_circular_output, []))
|
||||
@@ -0,0 +1,55 @@
|
||||
import kwic
|
||||
|
||||
empty_document = ""
|
||||
design_words_doc = "Design is hard.\nLet's just implement."
|
||||
goodbye_buddy_doc = "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!"
|
||||
hello_buddy_periods = "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!"
|
||||
|
||||
this_is_split_periods_circular_output = [
|
||||
(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['is', 'something', 'split', 'by', 'periods.', 'This'], 0),
|
||||
(['something', 'split', 'by', 'periods.', 'This', 'is'], 0),
|
||||
(['split', 'by', 'periods.', 'This', 'is', 'something'], 0),
|
||||
(['by', 'periods.', 'This', 'is', 'something', 'split'], 0),
|
||||
(['periods.', 'This', 'is', 'something', 'split', 'by'], 0),
|
||||
(['Not', 'newlines.'], 1),
|
||||
(['newlines.', 'Not'], 1)
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ensure empty input gives empty output
|
||||
assert(kwic.kwic(empty_document) == ([]))
|
||||
|
||||
# Ensure real input does not produce empty output
|
||||
assert(kwic.kwic(design_words_doc) != ([]))
|
||||
|
||||
# Make sure it's broken into two lines
|
||||
assert(len(kwic.kwic(design_words_doc)) == 6)
|
||||
|
||||
# Make sure it's broken into four line
|
||||
assert(len(kwic.kwic(goodbye_buddy_doc)) == 14)
|
||||
|
||||
# Just realized I can check each individual function, so here is the check to split by periods
|
||||
assert(kwic.split_by_periods("This is something split \nby periods. Not \n newlines.") ==
|
||||
["This is something split by periods.", " Not newlines."])
|
||||
|
||||
# This checks to make sure a sentence gets split into words properly
|
||||
assert(kwic.split_by_word_as_tuples(["This is something split by periods.", " Not newlines."]) ==
|
||||
[(['This', 'is', 'something', 'split', 'by', 'periods.'], 0), (['Not', 'newlines.'], 1)])
|
||||
|
||||
# These check to make sure that the circular shift function is rotating properly
|
||||
assert(kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 0) ==
|
||||
["One", "Two", "Three", "Four", "Five"])
|
||||
|
||||
assert (kwic.array_circular_shift(["One", "Two", "Three", "Four", "Five"], 2) ==
|
||||
["Three", "Four", "Five", "One", "Two"])
|
||||
|
||||
# This checks to make sure than circularly shifted versions of all sentences are correct
|
||||
assert(kwic.fill_with_circular_shifts_and_original([(['This', 'is', 'something', 'split', 'by', 'periods.'], 0),
|
||||
(['Not', 'newlines.'], 1)]) ==
|
||||
this_is_split_periods_circular_output)
|
||||
|
||||
# This gives a bit of a sanity check. It's making sure the circular shift works, and that the output is formatted
|
||||
# correctly...
|
||||
assert(kwic.kwic("This is something split \nby periods. Not \n newlines.", periodsToBreaks=True) ==
|
||||
this_is_split_periods_circular_output)
|
||||
@@ -0,0 +1,6 @@
|
||||
1) ignoreWords only applies to indexing (the shifts) -- it has no impact on listPairs.
|
||||
2) In listPairs, a "pair" is two (different) words that appear together in a "line" -- they may not be next to each other, they are just in the same line.
|
||||
3) The list of pairs should have structure of list of tuple of tuple, I'll fix the bad example in the assignment. Not lists.
|
||||
4) Submit files as a flat folder structure!!! kwic0.py testkwic0.py
|
||||
5) Use assert to test. It must throw an uncaught exception.
|
||||
6) The test testkwicN should pass kiwcN, but not if you ran it against a newer
|
||||
@@ -0,0 +1,10 @@
|
||||
from mykwic import *
|
||||
from pprint import pprint
|
||||
|
||||
for l in open("tocheck.txt"):
|
||||
print "="*50
|
||||
input = l[:-1]
|
||||
print "INPUT:",input
|
||||
v = eval("kwic("+input+")")
|
||||
print "OUTPUT:"
|
||||
pprint
|
||||
@@ -0,0 +1,326 @@
|
||||
==================================================
|
||||
INPUT: "Design is hard.\nLet's just implement."
|
||||
OUTPUT:
|
||||
[(['Design', 'is', 'hard.'], 0),
|
||||
(['hard.', 'Design', 'is'], 0),
|
||||
(['implement.', "Let's", 'just'], 1),
|
||||
(['is', 'hard.', 'Design'], 0),
|
||||
(['just', 'implement.', "Let's"], 1),
|
||||
(["Let's", 'just', 'implement.'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "Design is hard.\nLet's just implement.", ignoreWords=["is"]
|
||||
OUTPUT:
|
||||
[(['Design', 'is', 'hard.'], 0),
|
||||
(['hard.', 'Design', 'is'], 0),
|
||||
(['implement.', "Let's", 'just'], 1),
|
||||
(['just', 'implement.', "Let's"], 1),
|
||||
(["Let's", 'just', 'implement.'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "Design is hard.\nLet's just implement.", ignoreWords=["is"], listPairs=True
|
||||
OUTPUT:
|
||||
([(['Design', 'is', 'hard.'], 0),
|
||||
(['hard.', 'Design', 'is'], 0),
|
||||
(['implement.', "Let's", 'just'], 1),
|
||||
(['just', 'implement.', "Let's"], 1),
|
||||
(["Let's", 'just', 'implement.'], 1)],
|
||||
[])
|
||||
|
||||
==================================================
|
||||
INPUT: "Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!", listPairs=True
|
||||
OUTPUT:
|
||||
([(['and', 'goodbye,', 'buddy.', 'Hello'], 2),
|
||||
(['buddy', 'Goodbye!', 'Hello', 'is', 'like'], 3),
|
||||
(['buddy.', 'Hello', 'and', 'goodbye,'], 2),
|
||||
(['buddy.', 'Hello', 'there,'], 1),
|
||||
(['Goodbye!', 'Hello', 'is', 'like', 'buddy'], 3),
|
||||
(['goodbye,', 'buddy.', 'Hello', 'and'], 2),
|
||||
(['Hello', 'and', 'goodbye,', 'buddy.'], 2),
|
||||
(['Hello', 'is', 'like', 'buddy', 'Goodbye!'], 3),
|
||||
(['Hello', 'there,', 'buddy.'], 1),
|
||||
(['Hello', 'there.'], 0),
|
||||
(['is', 'like', 'buddy', 'Goodbye!', 'Hello'], 3),
|
||||
(['like', 'buddy', 'Goodbye!', 'Hello', 'is'], 3),
|
||||
(['there,', 'buddy.', 'Hello'], 1),
|
||||
(['there.', 'Hello'], 0)],
|
||||
[(('buddy', 'goodbye'), 2),
|
||||
(('buddy', 'hello'), 3),
|
||||
(('goodbye', 'hello'), 2),
|
||||
(('hello', 'there'), 2)])
|
||||
|
||||
==================================================
|
||||
INPUT: "Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!", listPairs=True, periodsToBreaks=True
|
||||
OUTPUT:
|
||||
([(['and', 'goodbye,', 'buddy.', 'Hello'], 2),
|
||||
(['buddy', 'Goodbye!', 'Hello', 'is', 'like'], 3),
|
||||
(['buddy.', 'Hello', 'and', 'goodbye,'], 2),
|
||||
(['buddy.', 'Hello', 'there,'], 1),
|
||||
(['Goodbye!', 'Hello', 'is', 'like', 'buddy'], 3),
|
||||
(['goodbye,', 'buddy.', 'Hello', 'and'], 2),
|
||||
(['Hello', 'and', 'goodbye,', 'buddy.'], 2),
|
||||
(['Hello', 'is', 'like', 'buddy', 'Goodbye!'], 3),
|
||||
(['Hello', 'there,', 'buddy.'], 1),
|
||||
(['Hello', 'there.'], 0),
|
||||
(['is', 'like', 'buddy', 'Goodbye!', 'Hello'], 3),
|
||||
(['like', 'buddy', 'Goodbye!', 'Hello', 'is'], 3),
|
||||
(['there,', 'buddy.', 'Hello'], 1),
|
||||
(['there.', 'Hello'], 0)],
|
||||
[(('buddy', 'goodbye'), 2),
|
||||
(('buddy', 'hello'), 3),
|
||||
(('goodbye', 'hello'), 2),
|
||||
(('hello', 'there'), 2)])
|
||||
|
||||
==================================================
|
||||
INPUT: ". . a"
|
||||
OUTPUT:
|
||||
[(['.', '.', 'a'], 0), (['.', 'a', '.'], 0), (['a', '.', '.'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: ". . a", periodsToBreaks=True
|
||||
OUTPUT:
|
||||
[(['.', '.', 'a'], 0), (['.', 'a', '.'], 0), (['a', '.', '.'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: ". A B\n. A B C\n. A B C D", listPairs=True
|
||||
OUTPUT:
|
||||
([(['.', 'A', 'B'], 0),
|
||||
(['.', 'A', 'B', 'C'], 1),
|
||||
(['.', 'A', 'B', 'C', 'D'], 2),
|
||||
(['A', 'B', '.'], 0),
|
||||
(['A', 'B', 'C', '.'], 1),
|
||||
(['A', 'B', 'C', 'D', '.'], 2),
|
||||
(['B', '.', 'A'], 0),
|
||||
(['B', 'C', '.', 'A'], 1),
|
||||
(['B', 'C', 'D', '.', 'A'], 2),
|
||||
(['C', '.', 'A', 'B'], 1),
|
||||
(['C', 'D', '.', 'A', 'B'], 2),
|
||||
(['D', '.', 'A', 'B', 'C'], 2)],
|
||||
[(('a', 'b'), 3), (('a', 'c'), 2), (('b', 'c'), 2)])
|
||||
|
||||
==================================================
|
||||
INPUT: "Hello world. This is a test\nhopefully it turns out okay", periodsToBreaks = True
|
||||
OUTPUT:
|
||||
[(['a', 'test', 'hopefully', 'it', 'turns', 'out', 'okay', 'This', 'is'], 1),
|
||||
(['Hello', 'world.'], 0),
|
||||
(['hopefully', 'it', 'turns', 'out', 'okay', 'This', 'is', 'a', 'test'], 1),
|
||||
(['is', 'a', 'test', 'hopefully', 'it', 'turns', 'out', 'okay', 'This'], 1),
|
||||
(['it', 'turns', 'out', 'okay', 'This', 'is', 'a', 'test', 'hopefully'], 1),
|
||||
(['okay', 'This', 'is', 'a', 'test', 'hopefully', 'it', 'turns', 'out'], 1),
|
||||
(['out', 'okay', 'This', 'is', 'a', 'test', 'hopefully', 'it', 'turns'], 1),
|
||||
(['test', 'hopefully', 'it', 'turns', 'out', 'okay', 'This', 'is', 'a'], 1),
|
||||
(['This', 'is', 'a', 'test', 'hopefully', 'it', 'turns', 'out', 'okay'], 1),
|
||||
(['turns', 'out', 'okay', 'This', 'is', 'a', 'test', 'hopefully', 'it'], 1),
|
||||
(['world.', 'Hello'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: "It's very nice to be footloose. \nWith just a toothbrush and a comb.\n"
|
||||
OUTPUT:
|
||||
[(['a', 'comb.', 'With', 'just', 'a', 'toothbrush', 'and'], 1),
|
||||
(['a', 'toothbrush', 'and', 'a', 'comb.', 'With', 'just'], 1),
|
||||
(['and', 'a', 'comb.', 'With', 'just', 'a', 'toothbrush'], 1),
|
||||
(['be', 'footloose.', "It's", 'very', 'nice', 'to'], 0),
|
||||
(['comb.', 'With', 'just', 'a', 'toothbrush', 'and', 'a'], 1),
|
||||
(['footloose.', "It's", 'very', 'nice', 'to', 'be'], 0),
|
||||
(["It's", 'very', 'nice', 'to', 'be', 'footloose.'], 0),
|
||||
(['just', 'a', 'toothbrush', 'and', 'a', 'comb.', 'With'], 1),
|
||||
(['nice', 'to', 'be', 'footloose.', "It's", 'very'], 0),
|
||||
(['to', 'be', 'footloose.', "It's", 'very', 'nice'], 0),
|
||||
(['toothbrush', 'and', 'a', 'comb.', 'With', 'just', 'a'], 1),
|
||||
(['very', 'nice', 'to', 'be', 'footloose.', "It's"], 0),
|
||||
(['With', 'just', 'a', 'toothbrush', 'and', 'a', 'comb.'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "It's very nice to be footloose. \nWith just a toothbrush and a comb.\n", periodsToBreaks=True
|
||||
OUTPUT:
|
||||
[(['a', 'comb.', 'With', 'just', 'a', 'toothbrush', 'and'], 1),
|
||||
(['a', 'toothbrush', 'and', 'a', 'comb.', 'With', 'just'], 1),
|
||||
(['and', 'a', 'comb.', 'With', 'just', 'a', 'toothbrush'], 1),
|
||||
(['be', 'footloose.', "It's", 'very', 'nice', 'to'], 0),
|
||||
(['comb.', 'With', 'just', 'a', 'toothbrush', 'and', 'a'], 1),
|
||||
(['footloose.', "It's", 'very', 'nice', 'to', 'be'], 0),
|
||||
(["It's", 'very', 'nice', 'to', 'be', 'footloose.'], 0),
|
||||
(['just', 'a', 'toothbrush', 'and', 'a', 'comb.', 'With'], 1),
|
||||
(['nice', 'to', 'be', 'footloose.', "It's", 'very'], 0),
|
||||
(['to', 'be', 'footloose.', "It's", 'very', 'nice'], 0),
|
||||
(['toothbrush', 'and', 'a', 'comb.', 'With', 'just', 'a'], 1),
|
||||
(['very', 'nice', 'to', 'be', 'footloose.', "It's"], 0),
|
||||
(['With', 'just', 'a', 'toothbrush', 'and', 'a', 'comb.'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "hello here, hello there, hello everywhere",listPairs = True
|
||||
OUTPUT:
|
||||
([(['everywhere', 'hello', 'here,', 'hello', 'there,', 'hello'], 0),
|
||||
(['hello', 'everywhere', 'hello', 'here,', 'hello', 'there,'], 0),
|
||||
(['hello', 'here,', 'hello', 'there,', 'hello', 'everywhere'], 0),
|
||||
(['hello', 'there,', 'hello', 'everywhere', 'hello', 'here,'], 0),
|
||||
(['here,', 'hello', 'there,', 'hello', 'everywhere', 'hello'], 0),
|
||||
(['there,', 'hello', 'everywhere', 'hello', 'here,', 'hello'], 0)],
|
||||
[])
|
||||
|
||||
==================================================
|
||||
INPUT: "hello here\nhello here again\nhello again", listPairs=True
|
||||
OUTPUT:
|
||||
([(['again', 'hello'], 2),
|
||||
(['again', 'hello', 'here'], 1),
|
||||
(['hello', 'again'], 2),
|
||||
(['hello', 'here'], 0),
|
||||
(['hello', 'here', 'again'], 1),
|
||||
(['here', 'again', 'hello'], 1),
|
||||
(['here', 'hello'], 0)],
|
||||
[(('again', 'hello'), 2), (('hello', 'here'), 2)])
|
||||
|
||||
==================================================
|
||||
INPUT: "hello hello hello\nhello hello", listPairs=True
|
||||
OUTPUT:
|
||||
([(['hello', 'hello'], 1),
|
||||
(['hello', 'hello'], 1),
|
||||
(['hello', 'hello', 'hello'], 0),
|
||||
(['hello', 'hello', 'hello'], 0),
|
||||
(['hello', 'hello', 'hello'], 0)],
|
||||
[])
|
||||
|
||||
==================================================
|
||||
INPUT: "to be or not to be", listPairs=True
|
||||
OUTPUT:
|
||||
([(['be', 'or', 'not', 'to', 'be', 'to'], 0),
|
||||
(['be', 'to', 'be', 'or', 'not', 'to'], 0),
|
||||
(['not', 'to', 'be', 'to', 'be', 'or'], 0),
|
||||
(['or', 'not', 'to', 'be', 'to', 'be'], 0),
|
||||
(['to', 'be', 'or', 'not', 'to', 'be'], 0),
|
||||
(['to', 'be', 'to', 'be', 'or', 'not'], 0)],
|
||||
[])
|
||||
|
||||
==================================================
|
||||
INPUT: ". A B\n. A B C\n. A B C D", listPairs=True
|
||||
OUTPUT:
|
||||
([(['.', 'A', 'B'], 0),
|
||||
(['.', 'A', 'B', 'C'], 1),
|
||||
(['.', 'A', 'B', 'C', 'D'], 2),
|
||||
(['A', 'B', '.'], 0),
|
||||
(['A', 'B', 'C', '.'], 1),
|
||||
(['A', 'B', 'C', 'D', '.'], 2),
|
||||
(['B', '.', 'A'], 0),
|
||||
(['B', 'C', '.', 'A'], 1),
|
||||
(['B', 'C', 'D', '.', 'A'], 2),
|
||||
(['C', '.', 'A', 'B'], 1),
|
||||
(['C', 'D', '.', 'A', 'B'], 2),
|
||||
(['D', '.', 'A', 'B', 'C'], 2)],
|
||||
[(('a', 'b'), 3), (('a', 'c'), 2), (('b', 'c'), 2)])
|
||||
|
||||
==================================================
|
||||
INPUT: "a bad\ncat barks."
|
||||
OUTPUT:
|
||||
[(['a', 'bad'], 0),
|
||||
(['bad', 'a'], 0),
|
||||
(['barks.', 'cat'], 1),
|
||||
(['cat', 'barks.'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "This is not a sentence.\nNeither is this.",ignoreWords=["is."]
|
||||
OUTPUT:
|
||||
[(['a', 'sentence.', 'This', 'is', 'not'], 0),
|
||||
(['is', 'not', 'a', 'sentence.', 'This'], 0),
|
||||
(['is', 'this.', 'Neither'], 1),
|
||||
(['Neither', 'is', 'this.'], 1),
|
||||
(['not', 'a', 'sentence.', 'This', 'is'], 0),
|
||||
(['sentence.', 'This', 'is', 'not', 'a'], 0),
|
||||
(['This', 'is', 'not', 'a', 'sentence.'], 0),
|
||||
(['this.', 'Neither', 'is'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "This is not a sentence.\nNeither is this.",ignoreWords=["is"]
|
||||
OUTPUT:
|
||||
[(['a', 'sentence.', 'This', 'is', 'not'], 0),
|
||||
(['Neither', 'is', 'this.'], 1),
|
||||
(['not', 'a', 'sentence.', 'This', 'is'], 0),
|
||||
(['sentence.', 'This', 'is', 'not', 'a'], 0),
|
||||
(['This', 'is', 'not', 'a', 'sentence.'], 0),
|
||||
(['this.', 'Neither', 'is'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "hello hello\nhello hello"
|
||||
OUTPUT:
|
||||
[(['hello', 'hello'], 0),
|
||||
(['hello', 'hello'], 0),
|
||||
(['hello', 'hello'], 1),
|
||||
(['hello', 'hello'], 1)]
|
||||
|
||||
==================================================
|
||||
INPUT: "#!good morning", ignoreWords = ['!good']
|
||||
OUTPUT:
|
||||
[(['#!good', 'morning'], 0), (['morning', '#!good'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: "go!od morning-!", ignoreWords = ['good']
|
||||
OUTPUT:
|
||||
[(['morning-!', 'go!od'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: "#!good morning-!", ignoreWords = ['!GoOd']
|
||||
OUTPUT:
|
||||
[(['#!good', 'morning-!'], 0), (['morning-!', '#!good'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: "?!good morning-!", ignoreWords = ['!GoOd']
|
||||
OUTPUT:
|
||||
[(['?!good', 'morning-!'], 0), (['morning-!', '?!good'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: "?!go!!!od morning-!", ignoreWords = ['!GoOd']
|
||||
OUTPUT:
|
||||
[(['?!go!!!od', 'morning-!'], 0), (['morning-!', '?!go!!!od'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: 'This pair? is good.\n So is this pair and that pair',listPairs=True
|
||||
OUTPUT:
|
||||
([(['and', 'that', 'pair', 'So', 'is', 'this', 'pair'], 1),
|
||||
(['good.', 'This', 'pair?', 'is'], 0),
|
||||
(['is', 'good.', 'This', 'pair?'], 0),
|
||||
(['is', 'this', 'pair', 'and', 'that', 'pair', 'So'], 1),
|
||||
(['pair', 'and', 'that', 'pair', 'So', 'is', 'this'], 1),
|
||||
(['pair', 'So', 'is', 'this', 'pair', 'and', 'that'], 1),
|
||||
(['pair?', 'is', 'good.', 'This'], 0),
|
||||
(['So', 'is', 'this', 'pair', 'and', 'that', 'pair'], 1),
|
||||
(['that', 'pair', 'So', 'is', 'this', 'pair', 'and'], 1),
|
||||
(['this', 'pair', 'and', 'that', 'pair', 'So', 'is'], 1),
|
||||
(['This', 'pair?', 'is', 'good.'], 0)],
|
||||
[(('is', 'pair'), 2), (('is', 'this'), 2), (('pair', 'this'), 2)])
|
||||
|
||||
==================================================
|
||||
INPUT: "CS is cool"
|
||||
OUTPUT:
|
||||
[(['cool', 'CS', 'is'], 0),
|
||||
(['CS', 'is', 'cool'], 0),
|
||||
(['is', 'cool', 'CS'], 0)]
|
||||
|
||||
==================================================
|
||||
INPUT: "a b\na b c\na b c d", listPairs=True
|
||||
OUTPUT:
|
||||
([(['a', 'b'], 0),
|
||||
(['a', 'b', 'c'], 1),
|
||||
(['a', 'b', 'c', 'd'], 2),
|
||||
(['b', 'a'], 0),
|
||||
(['b', 'c', 'a'], 1),
|
||||
(['b', 'c', 'd', 'a'], 2),
|
||||
(['c', 'a', 'b'], 1),
|
||||
(['c', 'd', 'a', 'b'], 2),
|
||||
(['d', 'a', 'b', 'c'], 2)],
|
||||
[(('a', 'b'), 3), (('a', 'c'), 2), (('b', 'c'), 2)])
|
||||
|
||||
==================================================
|
||||
INPUT: 'This pair? is good.\n So is this pair and that pair', listPairs=True
|
||||
OUTPUT:
|
||||
([(['and', 'that', 'pair', 'So', 'is', 'this', 'pair'], 1),
|
||||
(['good.', 'This', 'pair?', 'is'], 0),
|
||||
(['is', 'good.', 'This', 'pair?'], 0),
|
||||
(['is', 'this', 'pair', 'and', 'that', 'pair', 'So'], 1),
|
||||
(['pair', 'and', 'that', 'pair', 'So', 'is', 'this'], 1),
|
||||
(['pair', 'So', 'is', 'this', 'pair', 'and', 'that'], 1),
|
||||
(['pair?', 'is', 'good.', 'This'], 0),
|
||||
(['So', 'is', 'this', 'pair', 'and', 'that', 'pair'], 1),
|
||||
(['that', 'pair', 'So', 'is', 'this', 'pair', 'and'], 1),
|
||||
(['this', 'pair', 'and', 'that', 'pair', 'So', 'is'], 1),
|
||||
(['This', 'pair?', 'is', 'good.'], 0)],
|
||||
[(('is', 'pair'), 2), (('is', 'this'), 2), (('pair', 'this'), 2)])
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
"Design is hard.\nLet's just implement."
|
||||
"Design is hard.\nLet's just implement.", ignoreWords=["is"]
|
||||
"Design is hard.\nLet's just implement.", ignoreWords=["is"], listPairs=True
|
||||
"Hello there.\nHello there, buddy.\nHello and goodbye, buddy.\nHello is like buddy Goodbye!", listPairs=True
|
||||
"Hello there. Hello there, buddy. Hello and goodbye, buddy. Hello is like buddy Goodbye!", listPairs=True, periodsToBreaks=True
|
||||
". . a"
|
||||
". . a", periodsToBreaks=True
|
||||
". A B\n. A B C\n. A B C D", listPairs=True
|
||||
"Hello world. This is a test\nhopefully it turns out okay", periodsToBreaks = True
|
||||
"It's very nice to be footloose. \nWith just a toothbrush and a comb.\n"
|
||||
"It's very nice to be footloose. \nWith just a toothbrush and a comb.\n", periodsToBreaks=True
|
||||
"hello here, hello there, hello everywhere",listPairs = True
|
||||
"hello here\nhello here again\nhello again", listPairs=True
|
||||
"hello hello hello\nhello hello", listPairs=True
|
||||
"to be or not to be", listPairs=True
|
||||
". A B\n. A B C\n. A B C D", listPairs=True
|
||||
"a bad\ncat barks."
|
||||
"This is not a sentence.\nNeither is this.",ignoreWords=["is."]
|
||||
"This is not a sentence.\nNeither is this.",ignoreWords=["is"]
|
||||
"hello hello\nhello hello"
|
||||
"#!good morning", ignoreWords = ['!good']
|
||||
"go!od morning-!", ignoreWords = ['good']
|
||||
"#!good morning-!", ignoreWords = ['!GoOd']
|
||||
"?!good morning-!", ignoreWords = ['!GoOd']
|
||||
"?!go!!!od morning-!", ignoreWords = ['!GoOd']
|
||||
'This pair? is good.\n So is this pair and that pair',listPairs=True
|
||||
"CS is cool"
|
||||
"a b\na b c\na b c d", listPairs=True
|
||||
'This pair? is good.\n So is this pair and that pair', listPairs=True
|
||||
Reference in New Issue
Block a user