mirror of
https://github.com/caperren/school_archives.git
synced 2025-11-09 21:51:15 +00:00
Added work from my other class repositories before deletion
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,58 @@
|
||||
def shift(line):
|
||||
return [line[i:] + line[:i] for i in xrange(0,len(line))]
|
||||
|
||||
def cleanWord(word):
|
||||
return filter (lambda c: c not in [".",",","?","!",":"], word.lower())
|
||||
|
||||
def ignorable(word,ignoreWords):
|
||||
return cleanWord(word) in map(lambda w: w.lower(), ignoreWords)
|
||||
|
||||
def splitBreaks(string, periodsToBreaks):
|
||||
if not periodsToBreaks:
|
||||
return string.split("\n")
|
||||
else:
|
||||
line = ""
|
||||
lines = []
|
||||
lastChar1 = None
|
||||
lastChar2 = None
|
||||
breakChars = map(chr, xrange(ord('a'),ord('z')+1))
|
||||
for c in string:
|
||||
if (c == " ") and (lastChar1 == ".") and (lastChar2 in breakChars):
|
||||
lines.append(line)
|
||||
line = ""
|
||||
line += c
|
||||
lastChar2 = lastChar1
|
||||
lastChar1 = c
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
|
||||
def kwic(string,ignoreWords=[], listPairs=False, periodsToBreaks=False):
|
||||
lines = splitBreaks(string, periodsToBreaks)
|
||||
splitLines = map(lambda l: l.split(), lines)
|
||||
if listPairs:
|
||||
pairs = {}
|
||||
for l in splitLines:
|
||||
seen = set([])
|
||||
for wu1 in l:
|
||||
wc1 = cleanWord(wu1)
|
||||
if len(wc1) == 0:
|
||||
continue
|
||||
for wu2 in l:
|
||||
wc2 = cleanWord(wu2)
|
||||
if wc1 < wc2:
|
||||
if (wc1,wc2) in seen:
|
||||
continue
|
||||
seen.add((wc1,wc2))
|
||||
if (wc1, wc2) in pairs:
|
||||
pairs[(wc1,wc2)] += 1
|
||||
else:
|
||||
pairs[(wc1,wc2)] = 1
|
||||
shiftedLines = [map(lambda x:(x,i), shift(splitLines[i])) for i in xrange(0,len(splitLines))]
|
||||
flattenedLines = [l for subList in shiftedLines for l in subList]
|
||||
filteredLines = filter(lambda l: not ignorable(l[0][0], ignoreWords), flattenedLines)
|
||||
if not listPairs:
|
||||
return sorted(filteredLines, key = lambda l: (map(cleanWord, l[0]),l[1]))
|
||||
else:
|
||||
return (sorted(filteredLines, key = lambda l: (map(lambda w:w.lower(), l[0]),l[1])),
|
||||
map(lambda wp: (wp, pairs[wp]), sorted(filter(lambda wp: pairs[wp] > 1, pairs.keys()))))
|
||||
@@ -0,0 +1,20 @@
|
||||
- One version that has improved performance, and one to improve testability
|
||||
- Performance
|
||||
-- Faster by a good marging than the baseline version
|
||||
- Testability
|
||||
-- Make it easier to control and/or observe the behavior of the system
|
||||
-- Ideas
|
||||
--- Interfaces for controlling internal variables
|
||||
--- Copious assertions
|
||||
--- Limiting complexity
|
||||
--- Changes that make the code timing more consistent
|
||||
|
||||
|
||||
- Files (put in folder named perrenc361assign2.zip)
|
||||
-- kwic.py : baseline version
|
||||
-- fastkwic.py : better performance version
|
||||
-- fastarch.txt : describes changes in architectural terms that made faster, and how you tested this (400 words)
|
||||
-- fastarch.pdf : diagram of architecture
|
||||
-- testkwic.py : highly testable version of kwic
|
||||
-- testarch.txt : describes changes in architectural terms that made it more testable (200 words)
|
||||
-- testarch.pdf : diagram of architecture
|
||||
Reference in New Issue
Block a user