school_archives/OSU Coursework/CS 361 - Software Engineering I/Assignment 2/fastkwic.py

def alphabetized_key(input_data):
    output_array = map(str.lower, input_data[0])
    return output_array


def kwic(document, listPairs=False, ignoreWords=None, periodsToBreaks=False):
    if not document and not listPairs:
        return []
    elif not document and listPairs:
        return [], []

    if periodsToBreaks:
        output_array = []
        temp_sentence = ""
        document_length_zero_indexed = len(document) - 1
        for current_index, current_value in enumerate(document):
            if current_value == '.':
                if (current_index == 0) or (current_index == document_length_zero_indexed) or \
                        (document[current_index - 1].islower() and (document[current_index + 1].isspace() or
                                                                        (document[current_index + 1] == '\n'))):
                    temp_sentence += current_value
                    output_array.append(temp_sentence)
                    temp_sentence = ""
            else:
                if current_value != '\n':
                    temp_sentence += current_value
                else:
                    temp_sentence += " "

        if temp_sentence:
            output_array.append(temp_sentence)
        split_into_sentences = output_array
    else:
        split_into_sentences = document.split('\n')

    output_array_2 = []
    index_incrementer = 0

    temp_append = output_array_2.append
    temp_split = str.split
    for sentence in split_into_sentences:
        words_array = temp_split(sentence, " ")
        words_array = filter(None, words_array)
        temp_append((words_array, index_incrementer))

        index_incrementer += 1

    split_into_word_tuples = output_array_2
    circular_shifted_data = []

    temp_append = circular_shifted_data.append
    for current_tuple in output_array_2:
        for index, _ in enumerate(current_tuple[0]):
            temp_array = current_tuple[0][index:] + current_tuple[0][:index]
            temp_append((temp_array, current_tuple[1]))

    if ignoreWords:
        lowered_input = []

        for word in ignoreWords:
            lowered_input.append(word.lower())

        for current_tuple in circular_shifted_data:
            if current_tuple[0][0].lower().strip(".:!?,") in lowered_input:
                circular_shifted_data.remove(current_tuple)

    alphabetized_data = sorted(circular_shifted_data, key=alphabetized_key)

    if listPairs:
        known_pairs = {}

        for sentence_array, _ in split_into_word_tuples:
            seen_in_sentence = set([])

            for first_word in sentence_array:
                for second_word in sentence_array:

                    first = first_word.lower().translate(None, ".,?!:")
                    second = second_word.lower().translate(None, ".,?!:")

                    if (first == second) or (first == "") or (first > second):
                        continue

                    if (first, second) not in seen_in_sentence:
                        seen_in_sentence.add((first, second))

                        if (first, second) in known_pairs:
                            known_pairs[(first, second)] += 1
                        else:
                            known_pairs[(first, second)] = 1

        output_list = []

        for key in known_pairs:
            if known_pairs[key] > 1:
                output_list.append((key, known_pairs[key]))

        output_list.sort(key=alphabetized_key)

        return alphabetized_data, output_list
    else:
        return alphabetized_data