|
| 1 | +importsys |
| 2 | + |
| 3 | +classLetter: |
| 4 | +def__init__(self,letter,freq): |
| 5 | +self.letter=letter |
| 6 | +self.freq=freq |
| 7 | +self.bitstring="" |
| 8 | + |
| 9 | +def__repr__(self): |
| 10 | +returnf'{self.letter}:{self.freq}' |
| 11 | + |
| 12 | + |
| 13 | +classTreeNode: |
| 14 | +def__init__(self,freq,left,right): |
| 15 | +self.freq=freq |
| 16 | +self.left=left |
| 17 | +self.right=right |
| 18 | + |
| 19 | + |
| 20 | +defparse_file(file_path): |
| 21 | +""" |
| 22 | + Read the file and build a dict of all letters and their |
| 23 | + frequences, then convert the dict into a list of Letters. |
| 24 | + """ |
| 25 | +chars= {} |
| 26 | +withopen(file_path)asf: |
| 27 | +whileTrue: |
| 28 | +c=f.read(1) |
| 29 | +ifnotc: |
| 30 | +break |
| 31 | +chars[c]=chars[c]+1ifcinchars.keys()else1 |
| 32 | +letters= [] |
| 33 | +forchar,freqinchars.items(): |
| 34 | +letter=Letter(char,freq) |
| 35 | +letters.append(letter) |
| 36 | +letters.sort(key=lambdal:l.freq) |
| 37 | +returnletters |
| 38 | + |
| 39 | +defbuild_tree(letters): |
| 40 | +""" |
| 41 | + Run through the list of Letters and build the min heap |
| 42 | + for the Huffman Tree. |
| 43 | + """ |
| 44 | +whilelen(letters)>1: |
| 45 | +left=letters.pop(0) |
| 46 | +right=letters.pop(0) |
| 47 | +total_freq=left.freq+right.freq |
| 48 | +node=TreeNode(total_freq,left,right) |
| 49 | +letters.append(node) |
| 50 | +letters.sort(key=lambdal:l.freq) |
| 51 | +returnletters[0] |
| 52 | + |
| 53 | +deftraverse_tree(root,bitstring): |
| 54 | +""" |
| 55 | + Recursively traverse the Huffman Tree to set each |
| 56 | + Letter's bitstring, and return the list of Letters |
| 57 | + """ |
| 58 | +iftype(root)isLetter: |
| 59 | +root.bitstring=bitstring |
| 60 | +return [root] |
| 61 | +letters= [] |
| 62 | +letters+=traverse_tree(root.left,bitstring+"0") |
| 63 | +letters+=traverse_tree(root.right,bitstring+"1") |
| 64 | +returnletters |
| 65 | + |
| 66 | +defhuffman(file_path): |
| 67 | +""" |
| 68 | + Parse the file, build the tree, then run through the file |
| 69 | + again, using the list of Letters to find and print out the |
| 70 | + bitstring for each letter. |
| 71 | + """ |
| 72 | +letters_list=parse_file(file_path) |
| 73 | +root=build_tree(letters_list) |
| 74 | +letters=traverse_tree(root,"") |
| 75 | +print(f'Huffman Coding of{file_path}: ') |
| 76 | +withopen(file_path)asf: |
| 77 | +whileTrue: |
| 78 | +c=f.read(1) |
| 79 | +ifnotc: |
| 80 | +break |
| 81 | +le=list(filter(lambdal:l.letter==c,letters))[0] |
| 82 | +print(le.bitstring,end=" ") |
| 83 | +print() |
| 84 | + |
| 85 | +if__name__=="__main__": |
| 86 | +# pass the file path to the huffman function |
| 87 | +huffman(sys.argv[1]) |