Added missing classes from final year at OSU

This commit is contained in:
2019-06-17 14:04:15 -07:00
parent 8fa1ffb1b0
commit c717a0316f
166 changed files with 653934 additions and 308 deletions

View File

@@ -0,0 +1,37 @@
##### To Run This Program #####
Run one of the lines below replacing arguments as necessary:
./bloom_filter.py -d dictionary.txt -i input.txt -o output3.txt output5.txt
OR
python3 bloom_filter.py -d dictionary.txt -i input.txt -o output3.txt output5.txt
Both of the above commands have been tested on this program on the OSU Flip servers.
There is no guarantee that this will run on any computers other than these servers!!!
No makefile is needed for this program.
##### Answers to Questions #####
a.
The functions I chose were ripemd160, sha256, whirlpool, md5, and DSA. These are all cryptographic hashes. I chose
them because they are less likely to generate collisions than non-cryptographic ones (at the expense of being slower),
as well as because they are the ones build into the hashlib library for python3 on the flip servers, which guarantees
that the grader will be able to run the program without having to install or include additional libraries.
b.
ripemd160: 0.000006914
sha256: 0.00001025
whirlpool: 0.00001121
md5: 0.00001025
DSA: 0.000009298
ripemd160 and DSA are the fastest, though not by much. They perform better as their algorithms to generate the hash
computer more quickly than the others. It is also likely that the length of the hash output is shorter than ones like
sha256, which are quite large.
c.
The probability of false positives is 1% as I set the hash bit array size to 5976456, which I calculated using a
dictionary size of 623518. The result of the false positive equation -((623518 * log(0.01)/(log^2(2))) = 5976456.
The probability of false negatives is 0%. It is not possible to have a false negative with a bloom filter.
d.
The rate of false positives can be reduced by increasing the number of storable positions for the the hash bit array, or
by reducing the number of hash functions used to reduce collisions.

View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
# ##### Includes #####
# System includes
import sys
import getopt
import hashlib
import random
from time import time
# ##### Global Variables #####
USAGE_STRING = "usage: ./bloom_filter.py -d dictionary.txt -i input.txt -o output3.txt output5.txt"
NUM_ARGUMENTS_CORRECT = 7
HASH_ARRAY_SIZE = 5976456 # should be a 1% false positive rate
ARGUMENT_MAPPING = {
"dictionary": 1,
"input": 3,
"three_hash": 5,
"five_hash": 6
}
AVAILABLE_HASHES = [
"ripemd160",
"sha256",
"whirlpool",
"md5",
"DSA"
]
# ##### Bloom Filter Class #####
class BloomFilter(object):
def __init__(self, arguments):
super(BloomFilter, self).__init__()
if len(arguments) != NUM_ARGUMENTS_CORRECT:
print(USAGE_STRING)
sys.exit(2)
self.dictionary_path = None
self.input_file_path = None
self.three_hash_output_path = None
self.five_hash_output_path = None
self.three_hash_dictionary = {i: 0 for i in range(HASH_ARRAY_SIZE)}
self.five_hash_dictionary = {i: 0 for i in range(HASH_ARRAY_SIZE)}
self.dictionary_path = arguments[ARGUMENT_MAPPING["dictionary"]]
self.input_file_path = arguments[ARGUMENT_MAPPING["input"]]
self.three_hash_output_path = arguments[ARGUMENT_MAPPING["three_hash"]]
self.five_hash_output_path = arguments[ARGUMENT_MAPPING["five_hash"]]
def generate_filters(self):
dictionary_file = open(self.dictionary_path, "r", encoding="latin-1")
lines = dictionary_file.read().splitlines()
print("Generating filter using \"%s\". This will take a few moments." % self.dictionary_path)
for password in lines:
clean_password = password.strip()
for i in range(5):
five_hasher = hashlib.new(AVAILABLE_HASHES[i])
five_hasher.update(clean_password.encode())
current_hash = int(five_hasher.hexdigest(), 16)
self.five_hash_dictionary[current_hash % HASH_ARRAY_SIZE] = 1
for i in range(3):
three_hasher = hashlib.new(AVAILABLE_HASHES[i])
three_hasher.update(clean_password.encode())
current_hash = int(three_hasher.hexdigest(), 16)
self.three_hash_dictionary[current_hash % HASH_ARRAY_SIZE] = 1
print("Filter generation complete.")
dictionary_file.close()
def process_inputs_and_generate_outputs(self):
input_file = open(self.input_file_path, "r", encoding="latin-1")
lines = input_file.read().splitlines()
output_file_three_hash = open(self.three_hash_output_path, "w")
output_file_five_hash = open(self.five_hash_output_path, "w")
print("Processing input file \"%s\" and writing outputs to \"%s\" and \"%s\"." %
(self.input_file_path, self.three_hash_output_path, self.five_hash_output_path))
for password in lines[1:]:
in_set_three = True
in_set_five = True
clean_password = password.strip()
for i in range(5):
five_hasher = hashlib.new(AVAILABLE_HASHES[i])
five_hasher.update(clean_password.encode())
current_hash = int(five_hasher.hexdigest(), 16)
if self.five_hash_dictionary[current_hash % HASH_ARRAY_SIZE] == 0:
in_set_five = False
for i in range(3):
three_hasher = hashlib.new(AVAILABLE_HASHES[i])
three_hasher.update(clean_password.encode())
current_hash = int(three_hasher.hexdigest(), 16)
if self.three_hash_dictionary[current_hash % HASH_ARRAY_SIZE] == 0:
in_set_three = False
output_file_three_hash.write("%s\n" % ("no" if not in_set_three else "maybe"))
output_file_five_hash.write("%s\n" % ("no" if not in_set_five else "maybe"))
print("Processing complete.")
input_file.close()
output_file_three_hash.close()
output_file_five_hash.close()
# ##### Main #####
if __name__ == "__main__":
bloom_filter = BloomFilter(sys.argv[1:])
bloom_filter.generate_filters()
bloom_filter.process_inputs_and_generate_outputs()

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,19 @@
17
*holly&ben
*homo*
*vanusa*
010605
010605
walton-dutch-luzon-post
012190
0121909334
maskflower
2,,{H99*X(
darry-bethel-cube-mess
masking
undersupplied
undersupplies
7^*.$?GC86
undersupply
9Ca5B>w8.Q}bhU=ss*sK
karl

View File

@@ -0,0 +1,12 @@
no
maybe
no
no
no
maybe
no
maybe
maybe
maybe
no
maybe