Added missing classes from final year at OSU

2025-11-09 21:51:15 +00:00 · 2019-06-17 14:04:15 -07:00
parent 8fa1ffb1b0
commit c717a0316f
166 changed files with 653934 additions and 308 deletions
--- a/1/Programming_Assignment1_CS370-Fall2018.pdf
+++ b/1/Programming_Assignment1_CS370-Fall2018.pdf
--- a/Security/Programming
+++ b/Security/Programming
@@ -0,0 +1,37 @@
+##### To Run This Program #####
+Run one of the lines below replacing arguments as necessary:
+
+./bloom_filter.py -d dictionary.txt -i input.txt -o output3.txt output5.txt
+OR
+python3 bloom_filter.py -d dictionary.txt -i input.txt -o output3.txt output5.txt
+
+Both of the above commands have been tested on this program on the OSU Flip servers.
+There is no guarantee that this will run on any computers other than these servers!!!
+No makefile is needed for this program.
+
+##### Answers to Questions #####
+a.
+The functions I chose were ripemd160, sha256, whirlpool, md5, and DSA. These are all cryptographic hashes. I chose
+them because they are less likely to generate collisions than non-cryptographic ones (at the expense of being slower),
+as well as because they are the ones build into the hashlib library for python3 on the flip servers, which guarantees
+that the grader will be able to run the program without having to install or include additional libraries.
+
+b.
+ripemd160: 0.000006914
+sha256: 0.00001025
+whirlpool: 0.00001121
+md5: 0.00001025
+DSA: 0.000009298
+
+ripemd160 and DSA are the fastest, though not by much. They perform better as their algorithms to generate the hash
+computer more quickly than the others. It is also likely that the length of the hash output is shorter than ones like
+sha256, which are quite large.
+
+c.
+The probability of false positives is 1% as I set the hash bit array size to 5976456, which I calculated using a
+dictionary size of 623518. The result of the false positive equation -((623518 * log(0.01)/(log^2(2))) = 5976456.
+The probability of false negatives is 0%. It is not possible to have a false negative with a bloom filter.
+
+d.
+The rate of false positives can be reduced by increasing the number of storable positions for the the hash bit array, or
+by reducing the number of hash functions used to reduce collisions.
--- a/Security/Programming
+++ b/Security/Programming
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# ##### Includes #####
+# System includes
+import sys
+import getopt
+import hashlib
+import random
+from time import time
+
+# ##### Global Variables #####
+USAGE_STRING = "usage: ./bloom_filter.py -d dictionary.txt -i input.txt -o output3.txt output5.txt"
+NUM_ARGUMENTS_CORRECT = 7
+
+HASH_ARRAY_SIZE = 5976456  # should be a 1% false positive rate
+
+
+ARGUMENT_MAPPING = {
+    "dictionary": 1,
+    "input": 3,
+    "three_hash": 5,
+    "five_hash": 6
+}
+
+AVAILABLE_HASHES = [
+    "ripemd160",
+    "sha256",
+    "whirlpool",
+    "md5",
+    "DSA"
+]
+
+
+# ##### Bloom Filter Class #####
+class BloomFilter(object):
+    def __init__(self, arguments):
+        super(BloomFilter, self).__init__()
+
+        if len(arguments) != NUM_ARGUMENTS_CORRECT:
+            print(USAGE_STRING)
+            sys.exit(2)
+
+        self.dictionary_path = None
+        self.input_file_path = None
+        self.three_hash_output_path = None
+        self.five_hash_output_path = None
+
+        self.three_hash_dictionary = {i: 0 for i in range(HASH_ARRAY_SIZE)}
+        self.five_hash_dictionary = {i: 0 for i in range(HASH_ARRAY_SIZE)}
+
+        self.dictionary_path = arguments[ARGUMENT_MAPPING["dictionary"]]
+        self.input_file_path = arguments[ARGUMENT_MAPPING["input"]]
+        self.three_hash_output_path = arguments[ARGUMENT_MAPPING["three_hash"]]
+        self.five_hash_output_path = arguments[ARGUMENT_MAPPING["five_hash"]]
+
+    def generate_filters(self):
+        dictionary_file = open(self.dictionary_path, "r", encoding="latin-1")
+        lines = dictionary_file.read().splitlines()
+
+        print("Generating filter using \"%s\". This will take a few moments." % self.dictionary_path)
+
+        for password in lines:
+            clean_password = password.strip()
+            for i in range(5):
+                five_hasher = hashlib.new(AVAILABLE_HASHES[i])
+                five_hasher.update(clean_password.encode())
+                current_hash = int(five_hasher.hexdigest(), 16)
+
+                self.five_hash_dictionary[current_hash % HASH_ARRAY_SIZE] = 1
+
+            for i in range(3):
+                three_hasher = hashlib.new(AVAILABLE_HASHES[i])
+                three_hasher.update(clean_password.encode())
+                current_hash = int(three_hasher.hexdigest(), 16)
+                self.three_hash_dictionary[current_hash % HASH_ARRAY_SIZE] = 1
+
+        print("Filter generation complete.")
+
+        dictionary_file.close()
+
+    def process_inputs_and_generate_outputs(self):
+        input_file = open(self.input_file_path, "r", encoding="latin-1")
+        lines = input_file.read().splitlines()
+
+        output_file_three_hash = open(self.three_hash_output_path, "w")
+        output_file_five_hash = open(self.five_hash_output_path, "w")
+
+        print("Processing input file \"%s\" and writing outputs to \"%s\" and \"%s\"." %
+              (self.input_file_path, self.three_hash_output_path, self.five_hash_output_path))
+
+        for password in lines[1:]:
+            in_set_three = True
+            in_set_five = True
+
+            clean_password = password.strip()
+            for i in range(5):
+                five_hasher = hashlib.new(AVAILABLE_HASHES[i])
+                five_hasher.update(clean_password.encode())
+                current_hash = int(five_hasher.hexdigest(), 16)
+
+                if self.five_hash_dictionary[current_hash % HASH_ARRAY_SIZE] == 0:
+                    in_set_five = False
+
+            for i in range(3):
+                three_hasher = hashlib.new(AVAILABLE_HASHES[i])
+                three_hasher.update(clean_password.encode())
+                current_hash = int(three_hasher.hexdigest(), 16)
+
+                if self.three_hash_dictionary[current_hash % HASH_ARRAY_SIZE] == 0:
+                    in_set_three = False
+
+            output_file_three_hash.write("%s\n" % ("no" if not in_set_three else "maybe"))
+            output_file_five_hash.write("%s\n" % ("no" if not in_set_five else "maybe"))
+
+        print("Processing complete.")
+
+        input_file.close()
+        output_file_three_hash.close()
+        output_file_five_hash.close()
+
+
+# ##### Main #####
+if __name__ == "__main__":
+    bloom_filter = BloomFilter(sys.argv[1:])
+    bloom_filter.generate_filters()
+    bloom_filter.process_inputs_and_generate_outputs()
--- a/Security/Programming
+++ b/Security/Programming
--- a/Security/Programming
+++ b/Security/Programming
@@ -0,0 +1,19 @@
+17
+*holly&ben
+*homo*
+*vanusa*
+010605
+010605
+walton-dutch-luzon-post
+012190
+0121909334
+maskflower
+2,,{H99*X(
+darry-bethel-cube-mess
+masking
+undersupplied
+undersupplies
+7^*.$?GC86
+undersupply
+9Ca5B>w8.Q}bhU=ss*sK
+karl
--- a/Security/Programming
+++ b/Security/Programming
@@ -0,0 +1,12 @@
+no
+maybe
+no
+no
+no
+maybe
+no
+maybe
+maybe
+maybe
+no
+maybe