If it won't be simple, it simply won't be. [Hire me, source code] by Miki Tebeka, CEO, 353Solutions

Friday, November 16, 2007

Word Reduction

A little solution to http://ddj.com/cpp/202806370?pgno=3:
#!/usr/bin/env python

DICTIONRAY = set()

def load_dictionary(filename):
DICTIONRAY.add("a")
DICTIONRAY.add("i")
for line in open(filename):
DICTIONRAY.add(line.strip())

def _reduction(word):
if word not in DICTIONRAY:
return []
if len(word) == 1:
return [word]

for i in range(len(word)):
subword = "%s%s" % (word[:i], word[i+1:])
if subword not in DICTIONRAY:
continue
path = reduction(subword)
if path:
return [word] + path
return []

CACHE = {}
def reduction(word):
if word not in CACHE:
CACHE[word] = _reduction(word)

return CACHE[word]

def main(argv=None):
if argv is None:
import sys
argv = sys.argv

from os.path import isfile
from optparse import OptionParser

parser = OptionParser("usage: %prog DICTIONRAY")

opts, args = parser.parse_args(argv[1:])
if len(args) != 1:
parser.error("wrong number of arguments") # Will exit

dictfile = args[0]
if not isfile(dictfile):
raise SystemExit("error: can't find %s" % dictfile)

load_dictionary(dictfile)
for word in sorted(DICTIONRAY, key=lambda w: len(w), reverse=1):
path = reduction(word)
if path:
print "\n".join(path)
break

if __name__ == "__main__":
main()
Works fast enough as well (running on SIGNLE.TXT):
mtebeka@bugs:word-reduction - 08:43 $ time ./word_reduction.py dictionaries/SINGLE.TXT
restraint's
restraints
restrains
retrains
retains
retain
retin
rein
rin
in
n

real 0m4.088s
user 0m4.023s
sys 0m0.065s
mtebeka@bugs:word-reduction - 08:43 $

1 comment:

Yair said...

import sys
[dict_fn] = sys.argv[1:]
dict_words = [x.strip() for x in file(dict_fn)]
good_words = set(['', 'i', 'a'])
def sub_words(word):
--for i in range(len(word)):
----yield word[:i]+word[i+1:]
for word in sorted(dict_words, key=len):
--if any(w in good_words for w in sub_words(word)):
----good_words.add(word)
word = max(good_words, key=len)
while word:
--print word
--for word in sub_words(word):
----if word in good_words:
------break

Blog Archive