30 lines
780 B
Python
30 lines
780 B
Python
|
from collections import Counter
|
||
|
|
||
|
alphabet = "abcdefghijklmnopqrstuvwxyz"
|
||
|
|
||
|
reference = Counter((c for c in open("sample.tx").read().lower() if c in alphabet))
|
||
|
|
||
|
def get_statistics(text):
|
||
|
return Counter([c for c in text.lower() if c in alphabet])
|
||
|
|
||
|
def relative_most_common(statistics):
|
||
|
c, abs_probability = statistics.most_common(1)[0]
|
||
|
|
||
|
total_chrs = sum([v for k,v in statistics.most_common()])
|
||
|
return abs_probability / total_chrs
|
||
|
|
||
|
|
||
|
def get_statistical_key(text):
|
||
|
statistics = get_statistics(text)
|
||
|
|
||
|
quality = relative_most_common(statistics) / relative_most_common(reference)
|
||
|
|
||
|
c, abs_probability = statistics.most_common(1)[0]
|
||
|
|
||
|
K = abs(ord("e") - ord(c))
|
||
|
|
||
|
if(quality > 1):
|
||
|
quality = relative_most_common(reference) / relative_most_common(statistics)
|
||
|
|
||
|
return K, quality
|