Entry
Calculate the Index of Coincidence for a string.
Dec 18th, 2009 15:26
new acct, John Lehmann,
LANGUAGES = { "Arabic" : 0.075889,
"Danish" : 0.070731,
"Dutch" : 0.079805,
"English" : 0.066895,
"Finnish" : 0.073796,
"French" : 0.074604,
"German" : 0.076667,
"Greek" : 0.069165,
"Hebrew" : 0.076844,
"Italian" : 0.073294,
"Japanese" : 0.077236,
"Malay" : 0.085286,
"Norweigian" : 0.069428,
"Portuguese" : 0.074528,
"Russian" : 0.056074,
"Serbo Croatian" : 0.064363,
"Spanish" : 0.076613,
"Swedish" : 0.064489,
"Random" : 0.038461 }
def calculateIC(s):
"""
Calculate the index of coincidence.
F ( F - 1)
IC = (sum of) ----------
N ( N - 1)
"""
d = {}
n = 0
for c in s:
c = c.lower()
if c.isalpha():
d[c] = d.get(c, 0) + 1
n += 1
t = 0
def rect(x): return float(x) * (x - 1)
n1 = rect(n)
for f in d.values():
t += rect(f)
k = d.keys()
k.sort()
return t / n1
def findLanguage(s):
ic = calculateIC(s)
d = {}
for l in LANGUAGES:
d[l] = abs( ic - LANGUAGES[l] )
pairs = d.items()
pairs.sort(key=lambda x: x[1])
return pairs[0][0]