import BinarySearchTree wordlistfile = open("wordlist.txt") words = wordlistfile.read() wordlistfile.close() wordlist = words.split("\n") wordlist.sort() tree = BinarySearchTree.BinarySearchTree() tree.addSortedList(wordlist) criptofile = open("plaintexts.txt") plaintext = criptofile.read() criptofile.close() plaintextlist = plaintext.split("\n") globals()["count"] = 0 def countWords(string): if not string: return 0 for i in range(len(string)): for j in range(i+1,len(string)+1): if (tree.inside(string[i:j])): while (tree.startswith(string[i:j]) and j < len(string)+1): j += 1 while (not tree.inside(string[i:j])): j -= 1 return 1+countWords(string[j:]) if (not tree.startswith(string[i:j])): break return 0 def countAl(string, left, right): return 1 def sigProb(string): if not string: return 0 for i in range(len(string)): for j in range(i+1,len(string)+1): if (tree.inside(string[i:j])): while (tree.startswith(string[i:j]) and j < len(string)+1): j += 1 while (not tree.inside(string[i:j])): j -= 1 return (pow(4,j-i-1)+sigProb(string[j:])) if (not tree.startswith(string[i:j])): break return 0 def sigProbAl(string, left, right): return pow(4,right-left-1) def junkChars(string): if not string: return 0 for i in range(len(string)): for j in range(i+1,len(string)+1): if (tree.inside(string[i:j])): while (tree.startswith(string[i:j]) and j < len(string)+1): j += 1 while (not tree.inside(string[i:j])): j -= 1 return len(string[:i])+junkChars(string[j:]) if (not tree.startswith(string[i:j])): break return len(string) def wordCharsAl(string, left, right): return right-left def parseStringL(string, algorithms): if not algorithms: return None if not string: return False for left in range(len(string)): for right in range(left+1,len(string)+1): #print "looking at:",string[left:right] if (tree.inside(string[left:right])): while (tree.startswith(string[left:right]) and right < len(string)+1): right += 1 while (not tree.inside(string[left:right])): right -= 1 subsequentvalues = parseStringL(string[right:], algorithms) returnvalues = [] for algorithm in algorithms: returnvalues.insert(0, algorithm(string, left, right)) if (subsequentvalues and returnvalues): # for value in returnvalues: # print value for i in range(len(algorithms)): # print i," ",str(returnvalues[i]) returnvalues[i] += subsequentvalues[i] return returnvalues if (not tree.startswith(string[left:right])): break return False class Rankable: def __init__(self, string, value): self.item = string self.score = value def __cmp__(self, other): return cmp(self.score, other.score) def __eq__(self, other): return self.item == other.item def __str__(self): return str(self.item) def __getitem__(self, y): return self.item.__getitem__(y) list = [] num = 1 length = len(plaintextlist) for item in plaintextlist: print "Ranking",item[0:6],str(100*num/length)+"% ["+str(num)+" of "+str(length)+"]" num += 1 #list.insert(0,Rankable(item, countWords(item))) list.insert(0,Rankable(item, sigProb(item))) orderedtexts = open("orderedtexts.txt", 'w') #list.sort() #list.reverse() for item in list: print item[:15],"has a score of ",str(item.score) orderedtexts.write(str(item)+"\n\n")