PythonWise

Wednesday, April 30, 2008

XML RPC File Server

#!/usr/bin/env python
'''Simple file client/server using XML RPC'''

from SimpleXMLRPCServer import SimpleXMLRPCServer
from xmlrpclib import ServerProxy, Error as XMLRPCError
import socket

def get_file(filename):
  fo = open(filename, "rb")
  try: # When will "with" be here?
      return fo.read()
  finally:
      fo.close()

def main(argv=None):
  if argv is None:
      import sys
      argv = sys.argv

  default_port = "3030"
  from optparse import OptionParser

  parser = OptionParser("usage: %prog [options] [[HOST:]PORT]")
  parser.add_option("--get", help="get file", dest="filename",
          action="store", default="")

  opts, args = parser.parse_args(argv[1:])
  if len(args) not in (0, 1):
      parser.error("wrong number of arguments") # Will exit

  if args:
      port = args[0]
  else:
      port = default_port

  if ":" in port:
      host, port = port.split(":")
  else:
      host = "localhost"

  try:
      port = int(port)
  except ValueError:
      raise SystemExit("error: bad port - %s" % port)

  if opts.filename:
      try:
          proxy = ServerProxy("http://%s:%s" % (host, port))
          print proxy.get_file(opts.filename)
          raise SystemExit
      except XMLRPCError, e:
          error = "error: can't get %s (%s)" % (opts.filename, e.faultString)
          raise SystemExit(error)
      except socket.error, e:
          raise SystemExit("error: can't connect (%s)" % e)

  server = SimpleXMLRPCServer(("localhost", port))
  server.register_function(get_file)
  print "Serving files on port %d" % port
  server.serve_forever()

if __name__ == "__main__":
  main()

This is a huge security hole, use at your own risk.

Friday, April 18, 2008

web-install


#!/bin/bash
# Do the `./configure && make && sudo make install` dance, given a download URL

if [ $# -ne 1 ]; then
    echo "usage: `basename $0` URL"
    exit 1
fi

set -e  # Fail on errors

url=$1

wget --no-check-certificate $url
archive=`basename $url`

if echo $archive | grep -q .tar.bz2; then
    tar -xjf $archive
else
    tar -xzf $archive
fi

cd ${archive/.tar*}

if [ -f setup.py ]; then
    sudo python setup.py install
else
    ./configure && make && sudo make install
fi

cd ..

Tuesday, April 15, 2008

Some sites (such as Google), gives you a "trampoline" URL so they can register what you have clicked on. I find it highly annoying since you can't tell where you are going just by hovering above the URL and you can't "copy link location" to a document.

The problem is that these people are just lazy:

<html>
   <body>
       <a href="http://pythonwise.blogspot.com"
           onclick="jump(this, 1);">Pythonwise</a> knows.
   </body>
   <script src="jquery.js"></script>
   <script>
       function jump(url, value)
       {
           $.post("jump.cgi", {
               url: url,
               value: value
           });

           return true;
       }
   </script>
</html>

Notes:

Using jQuery
"value" can be anything you want to identify this specific click. I'd use a UUID and some table for registering who is the user, what is the url, the time ...

Wednesday, April 09, 2008

num_checkins


#!/bin/bash
# How many checking I did today?
# Without arguments will default to current directory

svn log -r"{`date +%Y%m%d`}:HEAD" $1 | grep "| $USER |" | wc -l

Thursday, April 03, 2008

FeedMe - A simple web-based RSS reader

A simple web-based RSS reader in less than 100 lines of code.

Using feedparser, jQuery and plain old CGI.

index.html


<html>
 <head>
     <title>FeedMe - A Minimal Web Based RSS Reader</title>
     <link rel="stylesheet" type="text/css" href="feedme.css" />
     <link rel="shortcut icon" href="feedme.ico" />
     <style>
         a {
             text-decoration: none;
         }
         a:hover {
             background-color: silver;
         }
         div.summary {
             display: none;
             position: absolute;
             background: gray;
             width: 70%;
             font 18px monospace;
             border: 1px solid black;
         }
     </style>
 </head>
 <body>
     <h2>FeedMe - A Minimal Web Based RSS Reader</h2>
     <div>
         Feed URL: <input type="text" size="80" id="feed_url"/>
         <button onclick="refresh_feed();">Load</button>
     </div>
     <hr />
     <div id="items">
     <div>
 </body>
 <script src="jquery.js"></script>
 <script>
     function refresh_feed() {
         var url = $.trim($("#feed_url").val());
         if ("" == url) {
             return;
         }

         $("#items").load("feed.cgi", {"url" : url});
         /* Update every minute */
         setTimeout("refresh_feed();", 1000 * 60);
     }
 </script>
</html>

feed.cgi


#!/usr/bin/env python

import feedparser
from cgi import FieldStorage, escape
from time import ctime

ENTRY_TEMPLATE = '''
<a href="%(link)s"
 onmouseover="$('#%(eid)s').show();"
 onmouseout="$('#%(eid)s').hide();"
 target="_new"
>
%(title)s
</a> <br />
<div class="summary" id="%(eid)s">
%(summary)s
</div>
'''

def main():
 print "Content-type: text/html\n"

 form = FieldStorage()
 url = form.getvalue("url", "")
 if not url:
     raise SystemExit("error: not url given")

 feed = feedparser.parse(url)
 for enum, entry in enumerate(feed.entries):
     entry.eid = "entry%d" % enum
     try:
         html = ENTRY_TEMPLATE % entry
         print html
     except Exception, e:
         # FIXME: Log errors
         pass

 print "<br />%s" % ctime()

if __name__ == "__main__":
 main()

How it works:

The JavaScript script call loads the output of feed.cgi to the items div
feed.cgi reads the RSS feed from the given URL and output an HTML fragment
Hovering over a title will show the entry summary
setTimeout makes sure we refresh the view every minute

Wednesday, March 26, 2008

httpserve


#!/bin/bash
# Quickly serve files over HTTP

# Miki Tebeka <miki.tebeka@gmail.com>

usage="usage: `basename $0` PATH [PORT]" 

if [ $# -ne 1 ] && [ $# -ne 2 ]; then
    echo $usage >&2
    exit 1
fi

case $1 in 
    "-h" | "-H" | "--help" ) echo $usage; exit;;
    * ) path=$1; port=$2;;
esac

if [ ! -d $path ]; then
    echo "error: $path is not a directory" >&2
    exit 1
fi

cd $path
python -m SimpleHTTPServer $port

Tuesday, March 18, 2008

unique


def unique(items):
    '''Remove duplicate items from a sequence, preserving order

    >>> unique([1, 2, 3, 2, 1, 4, 2])
    [1, 2, 3, 4]
    >>> unique([2, 2, 2, 1, 1, 1])
    [2, 1]
    >>> unique([1, 2, 3, 4])
    [1, 2, 3, 4]
    >>> unique([])
    []
    '''
    seen = set()

    def is_new(obj, seen=seen, add=seen.add):
        if obj in seen:
            return 0
        add(obj)
        return 1

    return filter(is_new, items)

Tuesday, March 04, 2008

ansiprint

Thursday, February 21, 2008

`extract-audio`

OK, not Python - but sometime bash is a better tool.


#!/bin/bash
# Extract audio from video files
# Uses ffmpeg and lame

# Miki Tebeka <miki.tebeka@gmail.com>

if [ $# -ne 2 ]; then
    echo "usage: `basename $0` INPUT_VIDEO OUTPUT_MP3"
    exit 1
fi

infile=$1
outfile=$2

if [ ! -f $infile ]; then
    echo "error: can't find $infile"
    exit 1
fi

if [ -f $outfile ]; then
    echo "error: $outfile exists"
    exit 1
fi

fifoname=/tmp/encode.$$
mkfifo $fifoname
mplayer -vc null -vo null -ao pcm:fast -ao pcm:file=$fifoname $1&
lame $fifoname $outfile
rm $fifoname

Wednesday, February 20, 2008

pfilter


#!/usr/bin/env python
'''Path filter, to be used in pipes to filter out paths.

* Unix test commands (such as -f can be specified as well)
* {} replaces file name

Examples:
    # List only files in current directory
    ls -a | pfilter -f

    # Find files not versioned in svn 
    # (why, oh why, does svn *always* return 0?)
    find . | pfilter 'test -n "`svn info {} 2>&1 | grep Not`"'
'''

__author__ = "Miki Tebeka <miki.tebeka@gmail.com>"

from os import system

def pfilter(path, command):
    '''Filter path according to command'''

    if "{}" in command:
        command = command.replace("{}", path)
    else:
        command = "%s %s" % (command, path)

    if command.startswith("-"):
        command = "test %s" %  command

    # FIXME: win32 support
    command += " 2>&1 > /dev/null"

    return system(command) == 0

def main(argv=None):
    if argv is None:
        import sys
        argv = sys.argv

    from sys import stdin
    from itertools import imap, ifilter
    from string import strip
    from functools import partial

    if len(argv) != 2:
        from os.path import basename
        from sys import stderr
        print >> stderr, "usage: %s COMMAND" % basename(argv[0])
        print >> stderr
        print >> stderr, __doc__
        raise SystemExit(1)

    command = argv[1]
    # Don't you love functional programming?
    for path in ifilter(partial(pfilter, command=command), imap(strip, stdin)):
        print path

if __name__ == "__main__":
    main()

Tuesday, February 12, 2008

Opening File according to mime type

Most of the modern desktops already have a command line utility to open file according to their mime type (GNOME/gnome-open, OSX/open, Windows/start, XFCE/exo-open, KDE/kfmclient ...)

However, most (all?) of them rely on the file extension, where I needed something to view attachments from mutt. Which passes the file data in stdin.

So, here we go (I call this attview):


#!/usr/bin/env python
'''View attachment with right application'''

__author__ = "Miki Tebeka <miki.tebeka@gmail.com>"

from os import popen, system
from os.path import isfile
import re

class ViewError(Exception):
   pass

def view_attachment(data):
    # In the .destop file, the file name is %u or %U
    u_sub = re.compile("%u", re.I).sub

    FILENAME = "/tmp/attview"
    fo = open(FILENAME, "wb")
    fo.write(data)
    fo.close()

    mime_type = popen("file -ib %s" % FILENAME).read().strip()
    if ";" in mime_type:
        mime_type = mime_type[:mime_type.find(";")]
    if mime_type == "application/x-not-regular-file":
        raise ViewError("can't guess mime type")

    APPS_DIR = "/usr/share/applications"
    for line in open("%s/defaults.list" % APPS_DIR):
        if line.startswith(mime_type):
            mime, appfile = line.strip().split("=")
            break
    else:
        raise ViewError("can't find how to open %s" % mime_type)

    appfile = "%s/%s" % (APPS_DIR, appfile)
    if not isfile(appfile):
        raise ViewError("can't find %s" % appfile)
    for line in open(appfile):
        line = line.strip()
        if line.startswith("Exec"):
            key, cmd = line.split("=")
            fullcmd = u_sub(FILENAME, cmd)
            if fullcmd == cmd:
                fullcmd += " %s" % FILENAME
            system(fullcmd + "&")
            break
    else:
        raise ViewError("can't find Exec in %s" % appfile)


def main(argv=None):
    from sys import stdin
    if argv is None:
        import sys
        argv = sys.argv

    from optparse import OptionParser

    parser = OptionParser("usage: %prog [FILENAME]")

    opts, args = parser.parse_args(argv[1:])
    if len(args) not in (0, 1):
        parser.error("wrong number of arguments") # Will exit

    filename = args[0] if args else "-"

    if filename == "-":
        data = stdin.read()
    else:
        try:
            data = open(filename, "rb").read()
        except IOError, e:
            raise SystemExit("error: %s" % e.strerror)

    try:
        view_attachment(data)
    except ViewError, e:
        raise SystemExit("error: %s" % e)

if __name__ == "__main__":
    main()

Thursday, February 07, 2008

Playing with bits


def mask(size):
    '''Mask for `size' bits

    >>> mask(3)
    7
    '''
    return (1L << size) - 1

def num2bits(num, width=32):
    '''String represntation (in bits) of a number

    >>> num2bits(3, 5) 
    '00011'
    '''
    s = ""
    for bit in range(width - 1, -1, -1):
        if num & (1L << bit):
            s += "1"
        else:
            s += "0"
    return s

def get_bit(value, bit):
    '''Get value of bit

    >>> num2bits(5, 5)
    '00101'
    >>> get_bit(5, 0)
    1
    >>> get_bit(5, 1)
    0
    '''
    return (value >> bit) & 1

def get_range(value, start, end):
    '''Get range of bits

    >>> num2bits(5, 5)
    '00101'
    >>> get_range(5, 0, 1)
    1
    >>> get_range(5, 1, 2)
    2
    '''
    return (value >> start) & mask(end - start + 1)

def set_bit(num, bit, value):
    '''Set bit `bit' in num to `value' 

    >>> i = 5
    >>> set_bit(i, 1, 1)
    7
    >>> set_bit(i, 0, 0)
    4
    '''
    if value:
        return num | (1L << bit)
    else:
        return num & (~(1L << bit))

def sign_extend(num, size):
    '''Sign exten number who is `size' bits wide
    
    >>> sign_extend(5, 2)
    1
    >>> sign_extend(5, 3)
    -3
    '''
    m = mask(size - 1)
    res = num & m
    # Positive
    if (num & (1L << (size - 1))) == 0:
        return res

    # Negative, 2's complement
    res = ~res
    res &= m
    res += 1
    return -res

Wednesday, February 06, 2008

rotate and stretch


from operator import itemgetter
from itertools import imap, chain, repeat

def rotate(matrix):
  '''Rotate matrix 90 degrees'''
  def row(row_num):
      return map(itemgetter(row_num), matrix)

  return map(row, range(len(matrix[0])))

def stretch(items, times):
  '''stretch([1,2], 3) -> [1,1,1,2,2,2]'''
  return reduce(add, map(lambda item: [item] * times, items), [])

def istretch(items, count):
    '''istretch([1,2], 3) -> [1,1,1,2,2,2] (generator)'''
    return chain(*imap(lambda i: repeat(i, count), items))

Friday, February 01, 2008

num2eng

Just found this on the web ...

svnfind


#!/usr/bin/env python
# Find paths matching directories in subversion repository

__author__ = "Miki Tebeka <miki.tebeka@gmail.com>"

# TODO:
# * Limit search depth
# * Add option to case [in]sensitive
# * Handling of svn errors
# * Support more of "find" predicates (-type, -and, -mtime ...)
# * Another porject: Pre index (using swish-e ...) and update only from
#   changelog

from os import popen

def join(path1, path2):
   if not path1.endswith("/"):
       path1 += "/"
   return "%s%s" % (path1, path2)

def svn_walk(root):
   command = "svn ls '%s'" % root
   for path in popen(command):
       path = join(root, path.strip())
       yield path
       if path.endswith("/"): # A directory
           for subpath in svn_walk(path):
                   yield subpath

def main(argv=None):
   if argv is None:
       import sys
       argv = sys.argv

   import re
   from itertools import ifilter
   from optparse import OptionParser

   parser = OptionParser("usage: %prog PATH EXPR")

   opts, args = parser.parse_args(argv[1:])
   if len(args) != 2:
       parser.error("wrong number of arguments") # Will exit

   path, expr = args
   try:
       pred = re.compile(expr, re.I).search
   except re.error:
       raise SystemExit("error: bad search expression: %s" % expr)

   found = 0
   for path in ifilter(pred, svn_walk(path)):
       found = 1
       print path

   if not found:
       raise SystemError("error: nothing matched %s" % expr)

if __name__ == "__main__":
   main()

Friday, January 18, 2008

Simple Text Summarizer

Comments:

About 50 lines of code
Gives reasonable results (try it out)
tokenize need to be improved much more (better detection, stop words ...)
split_to_sentences need to be improved much more (handle 3.2, Mr. Smith ...)
In real life you'll need to "clean" the text (Ads, credits, ...)

Tuesday, January 15, 2008

attrgetter is fast

#!/usr/bin/env python

from operator import attrgetter
from random import shuffle

class Point:
    def __init__(self, x, y):
        self.x, self.y = x, y

def sort1(points):
    points.sort(key = lambda p: p.x)

def sort2(points):
    points.sort(key = attrgetter("x"))

if __name__ == "__main__":
    from timeit import Timer

    points1 = [Point(x, 2 * x) for x in range(100)]
    points2 = points1[:]

    num_times = 10000

    t1 = Timer("sort1(points1)", "from __main__ import sort1, points1")
    print t1.timeit(num_times)

    t2 = Timer("sort2(points2)", "from __main__ import sort2, points2")
    print t2.timeit(num_times)

$ ./attr.py
0.492087125778
0.29891705513
$

Friday, January 04, 2008

Faster and Shorter "dot" using itertools

Let's calculate the dot product of two vectors:


from itertools import starmap, izip
from operator import mul

def dot1(v1, v2):
  result = 0
  for i, value in enumerate(v1):
      result += value * v2[i]
  return result

def dot2(v1, v2):
  return sum(starmap(mul, izip(v1, v2)))

if __name__ == "__main__":
  from timeit import Timer

  num_times = 1000
  v1 = range(100)
  v2 = range(100)

  t1 = Timer("dot1(%s, %s)" % (v1, v2), "from __main__ import dot1")
  print t1.timeit(num_times) # 0.038722038269

  t2 = Timer("dot2(%s, %s)" % (v1, v2), "from __main__ import dot2")
  print t2.timeit(num_times) # 0.0260770320892

dot2 is faster and shorter, however dot1 is more readable - my vote goes to dot2.

Friday, November 30, 2007

Going Scheme Style

Lets count the words our code, omitting comments:

#!/usr/bin/env python

from functools import partial
import re

filter_comment = partial(re.compile("#.*$").sub, "")
words = re.compile("[^ \t\n\r]+").findall

def num_words(text):
  '''Return the number of words in a code segment, ignoring comments

  >>> num_words("")
  0
  >>> num_words("1 + 1")
  3
  >>> num_words("1 + 1 # add 1 to 1")
  3
  '''
  return sum(map(len,
                 map(words,
                     map(filter_comment,
                         text.splitlines()))))


if __name__ == "__main__":
  import doctest
  doctest.testmod()

Friday, November 16, 2007

Word Reduction

A little solution to http://ddj.com/cpp/202806370?pgno=3:

#!/usr/bin/env python

DICTIONRAY = set()

def load_dictionary(filename):
  DICTIONRAY.add("a")
  DICTIONRAY.add("i")
  for line in open(filename):
      DICTIONRAY.add(line.strip())

def _reduction(word):
  if word not in DICTIONRAY:
      return []
  if len(word) == 1:
      return [word]

  for i in range(len(word)):
      subword = "%s%s" % (word[:i], word[i+1:])
      if subword not in DICTIONRAY:
          continue
      path = reduction(subword)
      if path:
          return [word] + path
  return []

CACHE = {}
def reduction(word):
  if word not in CACHE:
      CACHE[word] = _reduction(word)

  return CACHE[word]

def main(argv=None):
  if argv is None:
      import sys
      argv = sys.argv

  from os.path import isfile
  from optparse import OptionParser

  parser = OptionParser("usage: %prog DICTIONRAY")

  opts, args = parser.parse_args(argv[1:])
  if len(args) != 1:
      parser.error("wrong number of arguments") # Will exit

  dictfile = args[0]
  if not isfile(dictfile):
      raise SystemExit("error: can't find %s" % dictfile)

  load_dictionary(dictfile)
  for word in sorted(DICTIONRAY, key=lambda w: len(w), reverse=1):
      path = reduction(word)
      if path:
          print "\n".join(path)
          break

if __name__ == "__main__":
  main()

Works fast enough as well (running on SIGNLE.TXT):

mtebeka@bugs:word-reduction - 08:43 $ time ./word_reduction.py dictionaries/SINGLE.TXT
restraint's
restraints
restrains
retrains
retains
retain
retin
rein
rin
in
n

real    0m4.088s
user    0m4.023s
sys     0m0.065s
mtebeka@bugs:word-reduction - 08:43 $

Wednesday, October 31, 2007

Quick Web Searches

A little script to search the documents in Journal of Machine Learning.


import webbrowser
from urllib import urlencode

def jmlr(words):
 query = "site:http://jmlr.csail.mit.edu filetype:pdf " + " ".join(words)
 url = "http://www.google.com/search?" + urlencode([("q", query)])

 webbrowser.open(url)

if __name__ == "__main__":
 from optparse import OptionParser

 parser = OptionParser("usage: %prog WORD1 [WORD2 ...]")

 opts, args = parser.parse_args()
 if not args:
     parser.error("wrong number of arguments") # Will exit

 jmlr(args)

I have many more is these for Google search, Wikipedia search, Acronym Search ...
The trick is to do a search using the web interface and then look at the URL of the results page.

Tuesday, October 23, 2007

Minimal testing

I'm using py.test as our test suite. And found out that even the most minimal tests give me great benefits:

def test_joke():
 pass

One I have that in a file call test_joke.py,
py.test will pick it up and try to run it.

The good thing to have only this minimal code is that the test will fail if you happen to introduce a syntax error or some error in the module initialization.

Of course, when I have more time I beef up the tests ;)

What make it even more useful is a continuous integration system that run the tests every time someone checks in the code.

Wednesday, October 10, 2007

Persistant ID generator

Let's say you want to map key->number, and every time you get a new key you give it a different number.
(Useful for mapping words to vector index in IR)

The easy way is:

from collections import defaultdict
from itertools import count

vector_index = defaultdict(count(0).next)
print vector_index["a"] # 0
print vector_index["a"] # 0
print vector_index["b"] # 1

Monday, September 24, 2007

Python Load Time Is Slow

I'm very fond of CGI, the main reason is that if a CGI crashes - it's only this process for this user. Not the whole system.

However, Python is *slow* starting up - a problem in CGI world. I still use it but wish it'll load faster.

Here are some number for comparison (all programs just print "Hello there" and exit):


hw          real: 0.001 user: 0.001 sys: 0.000 
hwcc        real: 0.002 user: 0.001 sys: 0.001 
hw.lua      real: 0.004 user: 0.001 sys: 0.004 
hw.pl       real: 0.004 user: 0.002 sys: 0.001 
hw.rb       real: 0.006 user: 0.004 sys: 0.002 
hw.php      real: 0.017 user: 0.013 sys: 0.005 
hw.py       real: 0.019 user: 0.011 sys: 0.008 
hw.lsp      real: 0.023 user: 0.011 sys: 0.011 
hw.scm      real: 0.027 user: 0.019 sys: 0.008 
hwcs.exe    real: 0.036 user: 0.029 sys: 0.007 
HT.class    real: 0.084 user: 0.027 sys: 0.01

See the makefile below for which is each program.
As you can see Python is somewhere in the middle, not as bad as Java and C#, but about 5 times slower than Perl.

Makefile:

SCRIPTS = hw.py hw.rb hw.pl hw.lsp hw.scm hw.lua hw.php
GENERATED = hw hwcc hwcs.exe
JAVA = HT.class
PROGRAMS = $(SCRIPTS) $(GENERATED) $(JAVA)

all: $(PROGRAMS) times
@echo DONE

hw: hw.c
gcc -o $@ -O3 $<

hwcc: hw.cc
g++ -o $@ -O3 $<

HT.class: hw.java
javac $<

and timeit is:


#!/bin/bash

TIMEFORMAT='real: %3R user: %3U sys: %3S'
SPACES="                               "
MAXLEN=10


for program in $*;
do
name=${program/.class/}
if [ ${program/.exe/} != $program ]; then
    timestr=`(time mono $program > /dev/null) 2>&1`
elif [ $name == $program ]; then
    timestr=`(time ./$program > /dev/null) 2>&1`
else
    timestr=`(time java -client $name > /dev/null) 2>&1`
fi
padlen=$(($MAXLEN - ${#program}))
echo "${program}${SPACES:0:$padlen}${timestr}"
done

Wednesday, August 29, 2007

Simple launcher for XFCE

I'm using xfce4 as a window manager, it has a nice launcher called xfrun4, however it only tries to execute applications.

I've written a little launcher that uses exo-open to open it's argument. This way I can open PDF files, directories etc.
(It's not as fancy as Mac's QuickSilver or Windows SlickRun, but it does the job)

In order to get quick access, open the keyboard settings, create a new theme (you can't change the default) and add the launcher there. I usually go with "CTRL-SHIFT-K" to activate.


#!/usr/bin/env python
'''Simple lanucher'''

from Tkinter import Tk, Label, Entry
from tkFont import Font
from tkMessageBox import showerror

from os.path import exists, expanduser
from os import environ, popen

def launch(name):
   name = expanduser(name)
   if not exists(name):
       fullname = popen("which %s 2>/dev/null" % name).read().strip()
       if not fullname:
           raise ValueError("can't find %s" % name)
       name = fullname

   popen("/usr/bin/exo-open \"%s\"" % name).read()


USER_CANCEL = 0
ROOT = None
COMMAND = None

def quit(event):
   global USER_CANCEL

   USER_CANCEL = 1
   ROOT.quit()

def build_ui():
   global ROOT, COMMAND

   ROOT = Tk()
   ROOT.title("Launchie")
   ROOT.bind("<Escape>", quit)
   COMMAND = Entry(width=80, font=Font(size=14))
   COMMAND.pack()
   COMMAND.bind("<Return>", lambda e: ROOT.quit())

def show_ui():
   global USER_CANCEL

   USER_CANCEL = 0
   COMMAND.focus()
   ROOT.mainloop()

   return COMMAND.get().strip()


def main(argv=None):
   if argv is None:
       import sys
       argv = sys.argv

   from optparse import OptionParser

   parser = OptionParser("usage: %prog")

   opts, args = parser.parse_args(argv[1:])
   if len(args) != 0:
       parser.error("wrong number of arguments") # Will exit


   build_ui()

   while 1:
       try:
           command = show_ui()
           if USER_CANCEL:
               raise SystemExit

           if not command:
               showerror("Launchie Error", "Please enter *something*")
               continue

           launch(command)
           break
       except ValueError:
           showerror("Lanuchie Error", "Can't launch %s" % command)

if __name__ == "__main__":
   main()

The reason I chose Tkinter is that it's already installed with Python and it's good enough for simple stuff like this.

Tuesday, August 21, 2007

start

Many times, I open files from the command line.
However each OS has it's own utility for opening file, so I have this little script called start (yes, I started my life on windows ;).


#!/bin/bash
# Open a file from command line, multi OS

# Miki Tebeka <miki.tebeka@gmail.com>

if [ $# -ne 1 ]; then
   echo "usage: `basename $0` PATH"
   exit 1
fi

if [ ! -e $1 ]; then
   echo "error: can't find $1" 1>&2
   exit 1
fi

case `uname` in
   Linux) open=exo-open;;
   Darwin) open=open;;
   CYGWIN*) open=cygstart;;
   MINGW32*) open=start;;
   *) echo "error: no start program for `uname` platform" 1>&2; exit 1;;
esac

$open "$1"

Friday, August 10, 2007

Supporting Search Query Syntax

It's very easy to add support for search-engine like syntax in your program.
The idea is to convert the query to a Python expression and then evaluate it.

We'll support the following syntax:

word1 word2       - word1 and word2
word1 AND word2   - word1 and word2
word1 word2       - word1 and word2
word1 OR word2    - word1 ord word2
NOT word          - Not containing word

The code is very simple:

def is_operator(token):
  return token in set(["and", "not", "or", "(", ")"])

def should_insert_and(expr, token):
  if not expr:
      return 0

  if is_operator(expr[-1]):
      return 0

  if is_operator(token):
      return 0

  return 1

def match(query, text):
  words = set(text.lower().split())

  expr = []
  for token in query.lower().split():

      if should_insert_and(expr, token):
          expr.append("and")

      if is_operator(token):
          expr.append(token)
      else:
          expr.append(token in words)

  py_expr = " ".join(map(str, expr))
  return eval(py_expr)

def test():
  assert match("a", "a"), "a --- a"
  assert not match("a", ""), " --- a"
  assert match("a AND b", "a c b"), "a c b --- a AND b"
  assert not match("a AND b", "a c"), "a c --- a AND b"
  assert match("NOT ( a OR b )", "z"), "z --- NOT ( a OR b )"
  assert match("a OR b", "b"), "b --- a OR b"

Notes:
1. We don't do any fancy tokenization (text and query), but in most cases this should be enough.
2. We place an AND where it's missing.

Thursday, August 02, 2007

Cheetah Templates

Currently doing some web development and found Cheetah very useful.
I like Cheetah since it's syntax is very similar to Python and I can use my existing Python objects with it.

I have one master template that set the site general site look and feel (with the master CSS of course).

#from time import ctime

#attr NAME = "???"

#def head
#end def

#def body
OOOPS, head will roll...
#end def

<html>
<head>
<link rel="stylesheet" type="text/css" href="style.css" />
$head
</head>
<body>
<div class="header">My Wonderful Site - $NAME</div>
$body

<hr />
<div class="footer">
Generated $ctime()
</div>
</body>
</html>

$head and $body are place holders that the specific pages will fill.
Pages also define $NAME which will be shown in the header.

The an specific page (index.tmpl) can be:


#include "master.tmpl"

#attr NAME = "INDEX"

#def body
This is my site index page, see also <a href="other.cgi">other page</a>. <br />

Oh, and also random = $random;
#end def

And the CGI script:


#!/usr/local/bin/python

from Cheetah.Template import Template

from random import randint


def main():
   random = randint(0, 100)

   print "Content-Type: text/html"
   print

   page = Template(file="index.tmpl", searchList=[locals()])
   print page.respond()

if __name__ == "__main__":
   main()

Note that I pass locals() as the search list. This frees me from creating a mapping dictionary (exporting random to the template).

That's about it, you can use the master template and the site main CSS to have a uniform looking site and let each page implement just the $body and $head if it needs to.

Wednesday, July 18, 2007

Mini Excel

No so small as the calculator, but small enough (120 lines of code).
It can do any Python expression as a formula, support cell reference and ranges.

Monday, July 16, 2007

Watch that "yield"

Assume you have a program:

def f():
   print "F"
   yield 1

if __name__ == "__main__":
   f()

and you run it, nothing is printed.
Then you comment out the "yield" statement, and F is printed out.

I actually went a filled a bug report. I need to know better than that :)

Raymond pointed out that in the first case, the yield statement causes the function to become a generator and nothing is being run until next is called.

This means Python is behaving as expected, however I'd wish for Python or Pychecker to have a GCC like statement with no effect warning.

Thursday, June 14, 2007

`timeit`

Sometimes speed is important. The timeit modules lets you find how fast you are.

from timeit import Timer

def fast_fib(n):
  if n < 2:
      return 1

  a, b = 1, 1
  for i in range(n - 1):
      a, b = b, a + b

  return b

def slow_fib(n):
  if n < 2:
      return 1

  return slow_fib(n - 1) + slow_fib(n - 2)


INDEX = 20
TIMES = 100

fast_timer = Timer("fast_fib(INDEX)", "from __main__ import fast_fib, INDEX")
slow_timer = Timer("slow_fib(INDEX)", "from __main__ import slow_fib, INDEX")

print "slow:", slow_timer.timeit(TIMES) / TIMES
print "fast:", fast_timer.timeit(TIMES) / TIMES

On my machine this gives:


fast: 5.11884689331e-06
slow: 0.00996325016022

However sometimes you to send your function some more complex data, plus you don't want to add too much timing code into the top level of your module.
One way to do it is:

def benchmark():
  benchmark.index = 20
  timer = Timer("fast_fib(benchmark.index)",
                "from __main__ import fast_fib, benchmark")
  num_runs = 100

  print timer.timeit(num_runs) / num_runs

def main(argv=None):
  if argv is None:
      import sys
      argv = sys.argv

  from optparse import OptionParser

  parser = OptionParser("usage: %prog [options] MESSAGE")
  parser.add_option("--benchmark", help="run benchmark",
          dest="benchmark", action="store_true", default=0)

  opts, args = parser.parse_args(argv[1:])

  if opts.benchmark:
      benchmark()
      raise SystemExit()

  if len(args) != 1:
      parser.error("wrong number of arguments") # Will exit

  # Do main program stuff here
  try:
      print fast_fib(int(args[0]))
  except ValueError:
      raise SystemExit("error: %s - bad number" % args[0])

if __name__ == "__main__":
  main()

Wednesday, June 06, 2007

reStructuredText

I like to write documentation in a textual format. This way it's easy to view the changes in a regular diff tool and you can use almost any text editor to view the source.

The down side that you usually need to compile the documentation - no WYSIWYG.

I used to like LaTex. It produces excellent results out-of-the-box. And the math support is second to none.

However, reStructuedText has two advantages:
1. You can read the source directly and understand it
2. It produces good HTML (Yes, I know about latex2html)

You can also produces good PDF using rst2latex and then pdflatex.

A short document example:

===========
Hello World
===========
:Author: Miki Tebeka <miki@mikitebeka.com>
:Date: $Date: 2007-06-05 21:02:04 -0700 (Tue, 05 Jun 2007) $

.. contents::

Chapter 1
=========
In the beginning ...

Sub Chapter 1
-------------
There was LaTex_ [#]_

Chapter 2
=========
And then there was reST_

::
  
   That can have preformatted
   text


----

.. _LaTex: http://tug.org
.. _reST: http://docutils.sf.net/rst.html

.. [#] Which is still very good

.. comment: vim: ft=rst spell

The results using this makefile are:

%.html: %.txt
 rst2html.py --stylesheet style.css $< $@

%.pdf: %.tex
 pdflatex $<

%.tex: %.txt
 rst2latex.py $< $@

all: hw.html hw.pdf

clean:
 rm -f *.aux *.log *.pdf *.html *.out

fresh: clean all

.PHONY: all clean fresh

HTML

PDF

Final Notes

There are other such tools: markdown, asciidoc and others. I happen to like reST.

I use the following stylesheets (can't remember the URL I got them from):
style.css

/*
:Author: Fred L. Drake, Jr.
:date: $Date$
:version: $Revision$

This stylesheet combines some ideas from the two stylesheets
distributed with docutils and enhances them for Zope 3 documentation.
*/

@import url(default.css);

div.document {
  margin: 0px 1em 1em 4em;
  padding: 0px; }

div.document a {
  text-decoration: none; }

div.document a[href] {
  text-decoration: underline; }

div.document h1.title {
  background-image: url("zope3logo.gif");
  background-position: -6px -4px;
  background-repeat: no-repeat;
  font-size: 150%;
  min-height: 50px; }

div.document div.section {
  margin: 0px 0px 1.5em 0px; }

div.document div.section h1 {
  background-color: rgb(230,230,230);
  margin-left: -2em;
  padding: 0.2em;
  padding-left: 0.35em;
  padding-top: 0.35em;
  /* This grey underline make this more visually distinctive on LCD
     monitors, which often don't have enough contrast. */
  border-right: thin solid rgb(180,180,180);
  border-bottom: thin solid rgb(180,180,180); }

div.document div.section div.section div.section h3 {
  margin-bottom: -0.5em; }

div.document h1 {
  font-family: sans-serif;
  font-size: 135%; }

div.document h2 {
  font-family: sans-serif;
  font-size: 120%; }

div.document h3 {
  font-family: sans-serif;
  font-size: 105%; }

div.document h4 {
  font-family: sans-serif;
  font-size: 100%; }

div.document h5 {
  font-family: sans-serif;
  font-size: 100%; }

div.document h6 {
  font-family: sans-serif;
  font-style: italic;
  font-size: 100%; }

div.document hr {
  width: 75%; }

div.document .literal .pre {
  background-color: white;
  font-family: lucidatypewriter, "lucida typewriter", sans-serif; }

div.document .literal-block {
  border: thin solid rgb(180,180,180);
  font-family: lucidatypewriter, "lucida typewriter", monospace;
  font-size: 80%;
  padding: 0.5em; }

div.document table.table {
  margin-left: 2em;
  margin-right: 2em; }

div.document table.table thead {
  background-color: rgb(230,230,230); }

/* docutils uses the "option" class with both "col" and "span"
   elements, so we have to be explicit here */
div.document .option-list span.option {
  font-weight: bold; }

div.document .option-list kbd {
  font-family: inherit; }

default.css

/*
:Author: David Goodger
:Contact: goodger@users.sourceforge.net
:date: $Date$
:version: $Revision$
:copyright: This stylesheet has been placed in the public domain.

Default cascading style sheet for the HTML output of Docutils.
*/

div.document .first {
  margin-top: 0 }

div.document .last {
  margin-bottom: 0 }

div.document a.toc-backref {
  text-decoration: none ;
  color: black }

div.document dd {
  margin-bottom: 0.5em }

div.document div.abstract {
  margin: 2em 5em }

div.document div.abstract p.topic-title {
  font-weight: bold ;
  text-align: center }

div.document div.attention,
div.document div.caution,
div.document div.danger,
div.document div.error,
div.document div.hint,
div.document div.important,
div.document div.note,
div.document div.tip,
div.document div.warning,
div.document div.admonition {
  margin: 2em ;
  border: medium outset ;
  padding: 1em }

div.document div.attention p.admonition-title,
div.document div.caution p.admonition-title,
div.document div.danger p.admonition-title,
div.document div.error p.admonition-title,
div.document div.warning p.admonition-title {
  color: red ;
  font-weight: bold ;
  font-family: sans-serif }

div.document div.hint p.admonition-title,
div.document div.important p.admonition-title,
div.document div.note p.admonition-title,
div.document div.tip p.admonition-title,
div.document div.admonition p.admonition-title {
  font-weight: bold ;
  font-family: sans-serif }

div.document div.dedication {
  margin: 2em 5em ;
  text-align: center ;
  font-style: italic }

div.document div.dedication p.topic-title {
  font-weight: bold ;
  font-style: normal }

div.document div.figure {
  margin-left: 2em }

div.document div.footer,
div.document div.header {
  font-size: smaller }

div.document div.sidebar {
  margin-left: 1em ;
  border: medium outset ;
  padding: 0em 1em ;
  background-color: #ffffee ;
  width: 40% ;
  float: right ;
  clear: right }

div.document div.sidebar p.rubric {
  font-family: sans-serif ;
  font-size: medium }

div.document div.system-messages {
  margin: 5em }

div.document div.system-messages h1 {
  color: red }

div.document div.system-message {
  border: medium outset ;
  padding: 1em }

div.document div.system-message p.system-message-title {
  color: red ;
  font-weight: bold }

div.document div.topic {
  margin: 2em }

div.document h1.title {
  text-align: center }

div.document h2.subtitle {
  text-align: center }

div.document hr {
  width: 75% }

div.document ol.simple, ul.simple {
  margin-bottom: 1em }

div.document ol.arabic {
  list-style: decimal }

div.document ol.loweralpha {
  list-style: lower-alpha }

div.document ol.upperalpha {
  list-style: upper-alpha }

div.document ol.lowerroman {
  list-style: lower-roman }

div.document ol.upperroman {
  list-style: upper-roman }

div.document p.attribution {
  text-align: right ;
  margin-left: 50% }

div.document p.caption {
  font-style: italic }

div.document p.credits {
  font-style: italic ;
  font-size: smaller }

div.document p.label {
  white-space: nowrap }

div.document p.rubric {
  font-weight: bold ;
  font-size: larger ;
  color: darkred ;
  text-align: center }

div.document p.sidebar-title {
  font-family: sans-serif ;
  font-weight: bold ;
  font-size: larger }

div.document p.sidebar-subtitle {
  font-family: sans-serif ;
  font-weight: bold }

div.document p.topic-title {
  font-weight: bold }

div.document pre.address {
  margin-bottom: 0 ;
  margin-top: 0 ;
  font-family: serif ;
  font-size: 100% }

div.document pre.line-block {
  font-family: serif ;
  font-size: 100% }

div.document pre.literal-block, pre.doctest-block {
  margin-left: 2em ;
  margin-right: 2em ;
  background-color: #eeeeee }

div.document span.classifier {
  font-family: sans-serif ;
  font-style: oblique }

div.document span.classifier-delimiter {
  font-family: sans-serif ;
  font-weight: bold }

div.document span.interpreted {
  font-family: sans-serif }

div.document span.option {
  white-space: nowrap }

div.document span.option-argument {
  font-style: italic }

div.document span.pre {
  white-space: pre }

div.document span.problematic {
  color: red }

div.document table {
  margin-top: 0.5em ;
  margin-bottom: 0.5em }

div.document table.citation {
  border-left: solid thin gray ;
  padding-left: 0.5ex }

div.document table.docinfo {
  margin: 2em 4em }

div.document table.footnote {
  border-left: solid thin black ;
  padding-left: 0.5ex }

div.document td,
div.document th {
  padding-left: 0.5em ;
  padding-right: 0.5em ;
  vertical-align: top }

div.document th.docinfo-name,
div.document th.field-name {
  font-weight: bold ;
  text-align: left ;
  white-space: nowrap }

div.document h1 tt,
div.document h2 tt,
div.document h3 tt,
div.document h4 tt,
div.document h5 tt,
div.document h6 tt {
  font-size: 100% }

div.document tt {
  background-color: #eeeeee }

div.document ul.auto-toc {
  list-style-type: none }

Thursday, May 17, 2007

Websession

A nice class for supporting sessions in CGI scripts.
Usage Example:

Monday, May 07, 2007

Avoiding Indent

To reduce the amount of indentation, you can negate your logic:


def positive_logic(x):
    if x > 2:
        y = f(x)
        if y < 10:
            g = f(y)
            if g > 0:
                do_cool_stuff(g)

will become:


def negative_logic(x):
    if x <= 2:
        return
    y = f(x)
    if y >= 10:
        return
    g = f(y)
    if g <= 0:
        return
    do_cool_stuff(g)

Tuesday, April 24, 2007

Auto building C extension

Not as good as Perl's inline module, however this little tricks will build the C extension the 1'st time the module is loaded. (Works on Linux, probably on Mac and Windows with development tools installed)

Let's call our module autobuild, the in the autobuild directory we'll have:

init.py

def _build():
   from os.path import getmtime, isfile, dirname, join
   from sys import executable
   from os import system

   from setup import DYNLIB, SRC_FILE

   MODULE_DIR = dirname(__file__)

   def _run(cmd):
       return system("(cd \"%s\" && %s) > /dev/null 2>&1" % (MODULE_DIR, cmd))

   _full_src = join(MODULE_DIR, SRC_FILE)
   _full_dynlib = join(MODULE_DIR, DYNLIB)

   if (not isfile(_full_dynlib)) or (getmtime(_full_dynlib) < getmtime(_full_src)):
       assert _run("%s setup.py build_ext -i" % executable) == 0, "build error" 

_build()
del _build 
from _greet import *

setup.py


from distutils.core import Extension, setup

MODULE_NAME = "_greet"
DYNLIB = MODULE_NAME + ".so"
SRC_FILE = MODULE_NAME + ".c"

if __name__ == "__main__":
   setup(ext_modules=[Extension(MODULE_NAME, [SRC_FILE])])

_greet.c

#include <python.h>
#include <stdio.h>

static PyObject *
greet_greet(PyObject *self, PyObject *args)
{
   char *name;

   if (!PyArg_ParseTuple(args, "s", &name)) {
       return NULL;
   }

   printf("Hello %s\n", name);
 
   return Py_BuildValue("");
}

static PyMethodDef GreetMethods[] = {
   { "greet",  greet_greet, METH_VARARGS,
     "Print a friendly greeting."
   },
   {NULL, NULL, 0, NULL}        /* Sentinel */
};

PyMODINIT_FUNC
init_greet(void)
{
   Py_InitModule("_greet", GreetMethods);
}

Friday, April 13, 2007

Using Multi-Line String for Regular Expressions

Python (like C), concatenates strings that are separated only by white space.
This helps writing clearer regular expressions:

#!/usr/bin/env python
import re

# 2007-04-01 11:20
find_time = re.compile(
   "(?P<year>\d{4})"  # 4 digit year
   "-"
   "(?P<month>\d{2})"  # 2 digit month
   "-"
   "(?P<day>\d{2})"  # 2 digit day
   "\s+"        # white space(s)
   "(?P<hour>\d{2})"  # 2 digit hour
   ":"
   "(?P<minute>\d{2})"  # 2 digit minute
   ).search

match = find_time("The exact time is 2007-04-01  11:20.")
assert match, "can't find time"
print "MONTH: %s" % match.group("month")

Friday, March 30, 2007

HTML Entities

Quick way to see how are all HTML entities are displayed in your browser:


from urllib import urlopen
import re
import webbrowser

W3_URL = "http://www.w3.org/TR/WD-html40-970708/sgml/entities.html"
FILE_NAME = "/tmp/html-entities.html"
find_entity = re.compile("!ENTITY\s+([A-Za-z][A-Za-z0-9]+)").search

fo = open(FILE_NAME, "wt")

print >> fo, "<html><body><table border=\"1\">"

for line in urlopen(W3_URL):
    match = find_entity(line)
    if match:
        entity = match.groups()[0]
        print >> fo, "<tr><td>%s</td><td>&%s;</td></tr>" % (entity, entity)
print >> fo, "</table></body></html>"
fo.close()

webbrowser.open(FILE_NAME)

Say "NO" to Internet Violence

I'll make an exception for Kathy Sierra, and post a non-technical entry.

Just say "NO" to any violence in the internet, make it a better place for all of us.

Kathy, I hope you'll find the strength to overcome this.

Tuesday, March 27, 2007

Pushing Data - The Easy Way

One of the fastest ways to implement "pushing data to a server" is to have a CGI script on the server and push data to it from the clients.

This way you don't need to write a server, design a protocol, ... Just use an existing HTTP server (such as lighttpd) with CGI.

CGI Script:

#!/usr/bin/env python

from cgi import FieldStorage
from myapp import do_something_with_data

ERROR = "<html><body>Error: %s</body></html>"

def main():
   print "Content-Type: text/html"
   print

   form = FieldStorage()
   data = form.getvalue("data", "")
   key = form.getvalue("key", "").strip()
   if not (key and data):
       raise SystemExit(ERROR % "NO 'key' or 'data'")

   try:
       do_something_with_data(key, data)
   except Exception, e:
       raise SystemExit(ERROR % e)

   print "<html><body>OK</body></html>"

if __name__ == "__main__":
 main()

"Pushing" script:

#!/usr/bin/env python

from urllib import urlopen, urlencode

CGI_URL = "http://localhost:8080/load.cgi"
def push_data(key, data):
   query = urlencode([("data", data), ("key", key)])
   try:
       urlopen(CGI_URL, query).read()
   except IOError, e:
       pass # FIXME: Handle error

def main(argv=None):
   if argv is None:
       import sys
       argv = sys.argv

   from optparse import OptionParser
   from os.path import isfile, basename

   parser = OptionParser("usage: %prog FILENAME")

   opts, args = parser.parse_args(argv[1:])
   if len(args) != 1:
       parser.error("wrong number of arguments") # Will exit

   filename = args[0]
   if not isfile(filename):
       raise SystemExit("error: can't find %s" % filename)

   key = basename(filename)
   data = open(filename, "rb").read()

   push_data(key, data)


if __name__ == "__main__":
   main()

(Thanks to Martin for the idea)

Wednesday, March 21, 2007

`defaultdict`

Python 2.5 has a defaultdict dictionary in the collections
module.
defaultdict takes a factory function in the constructor. This function
will create the default value each time you try to get a missing item.

Then you can write a word histogram function like this:

from collections import defaultdict
def histogram(text):
   histogram = defaultdict(int) # int() -> 0
   for word in text.split():
       histogram[word] += 1
   return histogram

Or, if you want to store the location of the words as well

def histogram(text):
   histogram = defaultdict(list) # list() -> []
   for location, word in enumerate(text.split()):
       histogram[word].append(location)
   return histogram