diff options
author | murphy <murphy@rubychan.de> | 2009-04-14 04:03:40 +0000 |
---|---|---|
committer | murphy <murphy@rubychan.de> | 2009-04-14 04:03:40 +0000 |
commit | 70caebd582ffbd61ff0066911d80fb6afd960bdd (patch) | |
tree | 4d8c01f8e13b83b8610d83979824547fec580a95 /test/scanners/python/pleac.in.py | |
parent | d0e95e2cb7e1d7336a850178bb27ab98bc5b4ef0 (diff) | |
download | coderay-70caebd582ffbd61ff0066911d80fb6afd960bdd.tar.gz |
New: *Python Scanner* (feature #41)
* Based on pygment's implementation.
* The goal is to highlight both Python 2 and 3.
** print and exec will need special treatment.
* Still needs work; see TODOs in the code.
* Example files: PLEAC, pygments.
** The pygments.unistring code has very long lines which bother TextMate,
so I put this one in a separate file.
* Rating: 1 / 5 stars, "Poor support", because of problems with numeric literals.
* Added .py, .pyw and .py3 to FileType.
Else:
* Sorted lines in FileType::TypeFromExt.
Diffstat (limited to 'test/scanners/python/pleac.in.py')
-rw-r--r-- | test/scanners/python/pleac.in.py | 11119 |
1 files changed, 11119 insertions, 0 deletions
diff --git a/test/scanners/python/pleac.in.py b/test/scanners/python/pleac.in.py new file mode 100644 index 0000000..6b9a79b --- /dev/null +++ b/test/scanners/python/pleac.in.py @@ -0,0 +1,11119 @@ +# -*- python -*- +# vim:set ft=python: + +# @@PLEAC@@_NAME +# @@SKIP@@ Python + +# @@PLEAC@@_WEB +# @@SKIP@@ http://www.python.org + +# @@PLEAC@@_INTRO +# @@SKIP@@ The latest version of Python is 2.4 but users of 2.3 and 2.2 (and +# @@SKIP@@ in some cases earlier versions) can use the code herein. +# @@SKIP@@ Users of 2.2 and 2.3 should install or copy code from utils.py +# @@SKIP@@ (http://aima.cs.berkeley.edu/python/utils.py) +# @@SKIP@@ [the first section provides compatability code with 2.4] +# @@SKIP@@ Users of 2.2 should install optik (http://optik.sourceforge.com) +# @@SKIP@@ [for optparse and textwrap] +# @@SKIP@@ Where a 2.3 or 2.4 feature is unable to be replicated, an effort +# @@SKIP@@ has been made to provide a backward-compatible version in addition +# @@SKIP@@ to one using modern idioms. +# @@SKIP@@ Examples which translate the original Perl closely but which are +# @@SKIP@@ unPythonic are prefixed with a comment stating "DON'T DO THIS". +# @@SKIP@@ In some cases, it may be useful to know the techniques in these, +# @@SKIP@@ though it's a bad solution for the specific problem. + +# @@PLEAC@@_1.0 +#----------------------------- +mystr = "\n" # a newline character +mystr = r"\n" # two characters, \ and n +#----------------------------- +mystr = "Jon 'Maddog' Orwant" # literal single quote inside double quotes +mystr = 'Jon "Maddog" Orwant' # literal double quote inside single quotes +#----------------------------- +mystr = 'Jon \'Maddog\' Orwant' # escaped single quote +mystr = "Jon \"Maddog\" Orwant" # escaped double quote +#----------------------------- +mystr = """ +This is a multiline string literal +enclosed in triple double quotes. +""" +mystr = ''' +And this is a multiline string literal +enclosed in triple single quotes. +''' +#----------------------------- + +# @@PLEAC@@_1.1 +#----------------------------- + +# get a 5-char string, skip 3, then grab 2 8-char strings, then the rest +# Note that struct.unpack cannot use * for an unknown length. +# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65224 +import struct +(lead, s1, s2), tail = struct.unpack("5s 3x 8s 8s", data[:24]), data[24:] + +# split at five-char boundaries +fivers = struct.unpack("5s" * (len(data)//5), data) +fivers = print [x[i*5:i*5+5] for i in range(len(x)/5)] + +# chop string into individual characters +chars = list(data) +#----------------------------- +mystr = "This is what you have" +# +012345678901234567890 Indexing forwards (left to right) +# 109876543210987654321- Indexing backwards (right to left) +# note that 0 means 10 or 20, etc. above + +first = mystr[0] # "T" +start = mystr[5:7] # "is" +rest = mystr[13:] # "you have" +last = mystr[-1] # "e" +end = mystr[-4:] # "have" +piece = mystr[-8:-5] # "you" +#----------------------------- +# Python strings are immutable. +# In general, you should just do piecemeal reallocation: +mystr = "This is what you have" +mystr = mystr[:5] + "wasn't" + mystr[7:] + +# Or replace and reallocate +mystr = "This is what you have" +mystr = mystr.replace(" is ", " wasn't ") + +# DON'T DO THIS: In-place modification could be done using character arrays +import array +mystr = array.array("c", "This is what you have") +mystr[5:7] = array.array("c", "wasn't") +# mystr is now array('c', "This wasn't what you have") + +# DON'T DO THIS: It could also be done using MutableString +from UserString import MutableString +mystr = MutableString("This is what you have") +mystr[-12:] = "ondrous" +# mystr is now "This is wondrous" +#----------------------------- +# you can test simple substrings with "in" (for regex matching see ch.6): +if txt in mystr[-10:]: + print "'%s' found in last 10 characters"%txt + +# Or use the startswith() and endswith() string methods: +if mystr.startswith(txt): + print "%s starts with %s."%(mystr, txt) +if mystr.endswith(txt): + print "%s ends with %s."%(mystr, txt) + +#----------------------------- + +# @@PLEAC@@_1.2 +#----------------------------- +# Introductory Note: quite a bit of this section is not terribly Pythonic +# as names must be set before being used. For instance, unless myvar has +# been previously defined, these next lines will all raise NameError: +myvar = myvar or some_default +myvar2 = myvar or some_default +myvar |= some_default # bitwise-or, not logical-or - for demo + +# The standard way of setting a default is often: +myvar = default_value +if some_condition: + pass # code which may set myvar to something else + +# if myvar is returned from a function and may be empty/None, then use: +myvar = somefunc() +if not myvar: + myvar = default_value + +# If you want a default value that can be overridden by the person calling +# your code, you can often wrap it in a function with a named parameter: +def myfunc(myvar="a"): + return myvar + "b" +print myfunc(), myfunc("c") +#=> ab cb + +# Note, though, that this won't work for mutable objects such as lists or +# dicts that are mutated in the function as the object is only created once +# and repeated calls to the same function will return the same object. This +# can be desired behaviour however - see section 10.3, for instance. +def myfunc(myvar=[]): + myvar.append("x") + return myvar +print myfunc(), myfunc() +#=> ['x'] ['x', 'x'] + +# You need to do: +def myfunc(myvar=None): + if myvar is None: + myvar = [] + myvar.append("x") + return myvar +print myfunc(), myfunc() +#=> ['x'] ['x'] + +#=== Perl Equivalencies start here +# use b if b is true, otherwise use c +a = b or c + +# as that is a little tricksy, the following may be preferred: +if b: + a = b +else: + a = c + +# set x to y unless x is already true +if not x: + x = y +#----------------------------- +# use b if b is defined, else c +try: + a = b +except NameError: + a = c +#----------------------------- +foo = bar or "DEFAULT VALUE" +#----------------------------- +# To get a user (for both UNIX and Windows), use: +import getpass +user = getpass.getuser() + +# DON'T DO THIS: find the user name on Unix systems +import os +user = os.environ.get("USER") +if user is None: + user = os.environ.get("LOGNAME") +#----------------------------- +if not starting_point: + starting_point = "Greenwich" +#----------------------------- +if not a: # copy only if empty + a = b + +if b: # assign b if nonempty, else c + a = b +else: + a = c +#----------------------------- + +# @@PLEAC@@_1.3 +#----------------------------- +v1, v2 = v2, v1 +#----------------------------- +# DON'T DO THIS: +temp = a +a = b +b = temp +#----------------------------- +a = "alpha" +b = "omega" +a, b = b, a # the first shall be last -- and versa vice +#----------------------------- +alpha, beta, production = "January March August".split() +alpha, beta, production = beta, production, alpha +#----------------------------- + +# @@PLEAC@@_1.4 +#----------------------------- +num = ord(char) +char = chr(num) +#----------------------------- +char = "%c" % num +print "Number %d is character %c" % (num, num) +print "Number %(n)d is character %(n)c" % {"n": num} +print "Number %(num)d is character %(num)c" % locals() +#=> Number 101 is character e +#----------------------------- +ascii_character_numbers = [ord(c) for c in "sample"] +print ascii_character_numbers +#=> [115, 97, 109, 112, 108, 101] + +word = "".join([chr(n) for n in ascii_character_numbers]) +word = "".join([chr(n) for n in [115, 97, 109, 112, 108, 101]]) +print word +#=> sample +#----------------------------- +hal = "HAL" +ibm = "".join([chr(ord(c)+1) for c in hal]) # add one to each ASCII value +print ibm +#=> IBM +#----------------------------- + +# @@PLEAC@@_1.5 +#----------------------------- +mylist = list(mystr) +#----------------------------- +for char in mystr: + pass # do something with char +#----------------------------- +mystr = "an apple a day" +uniq = sorted(set(mystr)) +print "unique chars are: '%s'" % "".join(uniq) +#=> unique chars are: ' adelnpy' +#----------------------------- +ascvals = [ord(c) for c in mystr] +print "total is %s for '%s'."%(sum(ascvals), mystr) +#=> total is 1248 for 'an apple a day'. +#----------------------------- +# sysv checksum +def checksum(myfile): + values = [ord(c) for line in myfile for c in line] + return sum(values)%(2**16) - 1 + +import fileinput +print checksum(fileinput.input()) # data from sys.stdin + +# Using a function means any iterable can be checksummed: +print checksum(open("C:/test.txt") # data from file +print checksum("sometext") # data from string +#----------------------------- +#!/usr/bin/python +# slowcat - emulate a s l o w line printer +# usage: slowcat [- DELAY] [files ...] +import sys, select +import re +DELAY = 1 +if re.match("^-\d+$",sys.argv[1]): + DELAY=-int(sys.argv[1]) + del sys.argv[1] +for ln in fileinput.input(): + for c in ln: + sys.stdout.write(c) + sys.stdout.flush() + select.select([],[],[], 0.005 * DELAY) +#----------------------------- + +# @@PLEAC@@_1.6 +#----------------------------- +# 2.3+ only +revchars = mystr[::-1] # extended slice - step is -1 +revwords = " ".join(mystr.split(" ")[::-1]) + +# pre 2.3 version: +mylist = list(mystr) +mylist.reverse() +revbytes = "".join(mylist) + +mylist = mystr.split() +mylist.reverse() +revwords = ' '.join(mylist) + +# Alternative version using reversed(): +revchars = "".join(reversed(mystr)) +revwords = " ".join(reversed(mystr.split(" "))) + +# reversed() makes an iterator, which means that the reversal +# happens as it is consumed. This means that "print reversed(mystr)" is not +# the same as mystr[::-1]. Standard usage is: +for char in reversed(mystr): + pass # ... do something +#----------------------------- +# 2.3+ only +word = "reviver" +is_palindrome = (word == word[::-1]) +#----------------------------- +# Generator version +def get_palindromes(fname): + for line in open(fname): + word = line.rstrip() + if len(word) > 5 and word == word[::-1]: + yield word +long_palindromes = list(get_palindromes("/usr/share/dict/words")) + +# Simpler old-style version using 2.2 string reversal +def rev_string(mystr): + mylist = list(mystr) + mylist.reverse() + return "".join(mylist) + +long_palindromes=[] +for line in open("/usr/share/dict/words"): + word = line.rstrip() + if len(word) > 5 and word == rev_string(word): + long_palindromes.append(word) +print long_palindromes +#----------------------------- + +# @@PLEAC@@_1.7 +#----------------------------- +mystr.expandtabs() +mystr.expandtabs(4) +#----------------------------- + +# @@PLEAC@@_1.8 +#----------------------------- +text = "I am %(rows)s high and %(cols)s long"%{"rows":24, "cols":80) +print text +#=> I am 24 high and 80 long + +rows, cols = 24, 80 +text = "I am %(rows)s high and %(cols)s long"%locals() +print text +#=> I am 24 high and 80 long +#----------------------------- +import re +print re.sub("\d+", lambda i: str(2 * int(i.group(0))), "I am 17 years old") +#=> I am 34 years old +#----------------------------- +# expand variables in text, but put an error message in +# if the variable isn't defined +class SafeDict(dict): + def __getitem__(self, key): + return self.get(key, "[No Variable: %s]"%key) + +hi = "Hello" +text = "%(hi)s and %(bye)s!"%SafeDict(locals()) +print text +#=> Hello and [No Variable: bye]! + +#If you don't need a particular error message, just use the Template class: +from string import Template +x = Template("$hi and $bye!") +hi = "Hello" +print x.safe_substitute(locals()) +#=> Hello and $bye! +print x.substitute(locals()) # will throw a KeyError + +#----------------------------- + +# @@PLEAC@@_1.9 +#----------------------------- +mystr = "bo peep".upper() # BO PEEP +mystr = mystr.lower() # bo peep +mystr = mystr.capitalize() # Bo peep +#----------------------------- +beast = "python" +caprest = beast.capitalize().swapcase() # pYTHON +#----------------------------- +print "thIS is a loNG liNE".title() +#=> This Is A Long Line +#----------------------------- +if a.upper() == b.upper(): + print "a and b are the same" +#----------------------------- +import random +def randcase_one(letter): + if random.randint(0,5): # True on 1, 2, 3, 4 + return letter.lower() + else: + return letter.upper() + +def randcase(myfile): + for line in myfile: + yield "".join(randcase_one(letter) for letter in line[:-1]) + +for line in randcase(myfile): + print line +#----------------------------- + +# @@PLEAC@@_1.10 +#----------------------------- +"I have %d guanacos." % (n + 1) +print "I have", n+1, "guanacos." +#----------------------------- +#Python templates disallow in-string calculations (see PEP 292) +from string import Template + +email_template = Template("""\ +To: $address +From: Your Bank +CC: $cc_number +Date: $date + +Dear $name, + +Today you bounced check number $checknum to us. +Your account is now closed. + +Sincerely, +the management +""") + +import random +import datetime + +person = {"address":"Joe@somewhere.com", + "name": "Joe", + "cc_number" : 1234567890, + "checknum" : 500+random.randint(0,99)} + +print email_template.substitute(person, date=datetime.date.today()) +#----------------------------- + +# @@PLEAC@@_1.11 +#----------------------------- +# indenting here documents +# +# in python multiline strings can be used as here documents +var = """ + your text + goes here + """ + +# using regular expressions +import re +re_leading_blanks = re.compile("^\s+",re.MULTILINE) +var1 = re_leading_blanks.sub("",var)[:-1] + +# using string methods +# split into lines, use every line except first and last, left strip and rejoin. +var2 = "\n".join([line.lstrip() for line in var.split("\n")[1:-1]]) + +poem = """ + Here's your poem: + Now far ahead the Road has gone, + And I must follow, if I can, + Pursuing it with eager feet, + Until it joins some larger way + Where many paths and errand meet. + And whither then? I cannot say. + --Bilbo in /usr/src/perl/pp_ctl.c + """ + +import textwrap +print textwrap.dedent(poem)[1:-1] +#----------------------------- + + +# @@PLEAC@@_1.12 +#----------------------------- +from textwrap import wrap +output = wrap(para, + initial_indent=leadtab + subsequent_indent=nexttab) +#----------------------------- +#!/usr/bin/env python +# wrapdemo - show how textwrap works + +txt = """\ +Folding and splicing is the work of an editor, +not a mere collection of silicon +and +mobile electrons! +""" + +from textwrap import TextWrapper + +wrapper = TextWrapper(width=20, + initial_indent=" "*4, + subsequent_indent=" "*2) + +print "0123456789" * 2 +print wrapper.fill(txt) + +#----------------------------- +"""Expected result: + +01234567890123456789 + Folding and + splicing is the + work of an editor, + not a mere + collection of + silicon and mobile + electrons! +""" + +#----------------------------- +# merge multiple lines into one, then wrap one long line + +from textwrap import fill +import fileinput + +print fill("".join(fileinput.input())) + +#----------------------------- +# Term::ReadKey::GetTerminalSize() isn't in the Perl standard library. +# It isn't in the Python standard library either. Michael Hudson's +# recipe from python-list #530228 is shown here. +# (http://aspn.activestate.com/ASPN/Mail/Message/python-list/530228) +# Be aware that this will work on Unix but not on Windows. + +from termwrap import wrap +import struct, fcntl +def getheightwidth(): + height, width = struct.unpack( + "hhhh", fcntl.ioctl(0, TERMIOS.TIOCGWINSZ ,"\000"*8))[0:2] + return height, width + +# PERL <>, $/, $\ emulation +import fileinput +import re + +_, width = getheightwidth() +for para in re.split(r"\n{2,}", "".join(fileinput.input())): + print fill(para, width) + + +# @@PLEAC@@_1.13 +#----------------------------- +mystr = '''Mom said, "Don't do that."''' #" +re.sub("['\"]", lambda i: "\\" + i.group(0), mystr) +re.sub("[A-Z]", lambda i: "\\" + i.group(0), mystr) +re.sub("\W", lambda i: "\\" + i.group(0), "is a test!") # no function like quotemeta? + + +# @@PLEAC@@_1.14 +#----------------------------- +mystr = mystr.lstrip() # left +mystr = mystr.rstrip() # right +mystr = mystr.strip() # both ends + + +# @@PLEAC@@_1.15 +#----------------------------- +import csv +def parse_csv(line): + reader = csv.reader([line], escapechar='\\') + return reader.next() + +line = '''XYZZY,"","O'Reilly, Inc","Wall, Larry","a \\"glug\\" bit,",5,"Error, Core Dumped,",''' #" + +fields = parse_csv(line) + +for i, field in enumerate(fields): + print "%d : %s" % (i, field) + +# pre-2.3 version of parse_csv +import re +def parse_csv(text): + pattern = re.compile('''"([^"\\\]*(?:\\\.[^"\\\]*)*)",?|([^,]+),?|,''') + mylist = ["".join(elem) + for elem in re.findall(pattern, text)] + if text[-1] == ",": + mylist += [''] + return mylist + +# cvs.reader is meant to work for many lines, something like: +# (NB: in Python default, quotechar is *not* escaped by backslash, +# but doubled instead. That's what Excel does.) +for fields in cvs.reader(lines, dialect="some"): + for num, field in enumerate(fields): + print num, ":", field +#----------------------------- + +# @@PLEAC@@_1.16 +#----------------------------- +def soundex(name, len=4): + """ soundex module conforming to Knuth's algorithm + implementation 2000-12-24 by Gregory Jorgensen + public domain + """ + + # digits holds the soundex values for the alphabet + digits = '01230120022455012623010202' + sndx = '' + fc = '' + + # translate alpha chars in name to soundex digits + for c in name.upper(): + if c.isalpha(): + if not fc: + fc = c # remember first letter + d = digits[ord(c)-ord('A')] + # duplicate consecutive soundex digits are skipped + if not sndx or (d != sndx[-1]): + sndx += d + + # replace first digit with first alpha character + sndx = fc + sndx[1:] + + # remove all 0s from the soundex code + sndx = sndx.replace('0','') + + # return soundex code padded to len characters + return (sndx + (len * '0'))[:len] + +user = raw_input("Lookup user: ") +if user == "": + raise SystemExit + +name_code = soundex(user) +for line in open("/etc/passwd"): + line = line.split(":") + for piece in line[4].split(): + if name_code == soundex(piece): + print "%s: %s\n" % line[0], line[4]) +#----------------------------- + +# @@PLEAC@@_1.17 +#----------------------------- +import sys, fileinput, re + +data = """\ +analysed => analyzed +built-in => builtin +chastized => chastised +commandline => command-line +de-allocate => deallocate +dropin => drop-in +hardcode => hard-code +meta-data => metadata +multicharacter => multi-character +multiway => multi-way +non-empty => nonempty +non-profit => nonprofit +non-trappable => nontrappable +pre-define => predefine +preextend => pre-extend +re-compiling => recompiling +reenter => re-enter +turnkey => turn-key +""" +mydict = {} +for line in data.split("\n"): + if not line.strip(): + continue + k, v = [word.strip() for word in line.split("=>")] + mydict[k] = v +pattern_text = "(" + "|".join([re.escape(word) for word in mydict.keys()]) + ")" +pattern = re.compile(pattern_text) + +args = sys.argv[1:] +verbose = 0 +if args and args[0] == "-v": + verbose = 1 + args = args[1:] + +if not args: + sys.stderr.write("%s: Reading from stdin\n" % sys.argv[0]) + +for line in fileinput.input(args, inplace=1, backup=".orig"): + output = "" + pos = 0 + while True: + match = pattern.search(line, pos) + if not match: + output += line[pos:] + break + output += line[pos:match.start(0)] + mydict[match.group(1)] + pos = match.end(0) + sys.stdout.write(output) +#----------------------------- + +# @@PLEAC@@_1.18 +#----------------------------- +#!/usr/bin/python +# psgrep - print selected lines of ps output by +# compiling user queries into code. +# +# examples : +# psgrep "uid<10" +import sys, os, re + +class PsLineMatch: + # each field from the PS header + fieldnames = ("flags","uid","pid","ppid","pri","nice","size", \ + "rss","wchan","stat","tty","time","command") + numeric_fields = ("flags","uid","pid","ppid","pri","nice","size","rss") + def __init__(self): + self._fields = {} + + def new_line(self, ln): + self._ln = ln.rstrip() + # ps header for option "wwaxl" (different than in the perl code) + """ + F UID PID PPID PRI NI VSZ RSS WCHAN STAT TTY TIME COMMAND" + 004 0 1 0 15 0 448 236 schedu S ? 0:07 init" + . . . . . . . . . . . . . + """ + # because only the last entry might contain blanks, splitting + # is safe + data = self._ln.split(None,12) + for fn, elem in zip(self.fieldnames, data): + if fn in self.numeric_fields: # make numbers integer + self._fields[fn] = int(elem) + else: + self._fields[fn] = elem + + def set_query(self, args): + # assume args: "uid==500", "command ~ ^wm" + conds=[] + m = re.compile("(\w+)([=<>]+)(.+)") + for a in args: + try: + (field,op,val) = m.match(a).groups() + except: + print "can't understand query \"%s\"" % (a) + raise SystemExit + if field in self.numeric_fields: + conds.append(a) + else: + conds.append("%s%s'%s'",(field,op,val)) + self._desirable = compile("(("+")and(".join(conds)+"))", "<string>","eval") + + def is_desirable(self): + return eval(self._desirable, {}, self._fields) + + def __str__(self): + # to allow "print". + return self._ln + +if len(sys.argv)<=1: + print """usage: %s criterion ... + Each criterion is a Perl expression involving: + %s + All criteria must be met for a line to be printed.""" \ + % (sys.argv[0], " ".join(PsLineMatch().fieldnames)) + raise SystemExit + +psln = PsLineMatch() +psln.set_query(sys.argv[1:]) +p = os.popen("ps wwaxl") +print p.readline()[:-1] # emit header line +for ln in p.readlines(): + psln.new_line(ln) + if psln.is_desirable(): + print psln +p.close() + +# alternatively one could consider every argument being a string and +# support wildcards: "uid==500" "command~^wm" by means of re, but this +# does not show dynamic python code generation, although re.compile +# also precompiles. +#----------------------------- + + +# @@PLEAC@@_2.1 +#----------------------------- +# The standard way of validating numbers is to convert them and catch +# an exception on failure + +try: + myfloat = float(mystr) + print "is a decimal number" +except TypeError: + print "is not a decimal number" + +try: + myint = int(mystr) + print "is an integer" +except TypeError: + print "is not an integer" + +# DON'T DO THIS. Explicit checking is prone to errors: +if mystr.isdigit(): # Fails on "+4" + print 'is a positive integer' +else: + print 'is not' + +if re.match("[+-]?\d+$", mystr): # Fails on "- 1" + print 'is an integer' +else: + print 'is not' + +if re.match("-?(?:\d+(?:\.\d*)?|\.\d+)$", mystr): # Opaque, and fails on "- 1" + print 'is a decimal number' +else: + print 'is not' + +#----------------------------- + +# @@PLEAC@@_2.2 +#----------------------------- +# equal(num1, num2, accuracy) : returns true if num1 and num2 are +# equal to accuracy number of decimal places + +def equal(num1, num2, accuracy): + return abs(num1 - num2) < 10**(-accuracy) +#----------------------------- +from __future__ import division # use / for float div and // for int div + +wage = 536 # $5.36/hour +week = 40 * wage # $214.40 +print "One week's wage is: $%.2f" % (week/100) +#=> One week's wage is: $214.40 +#----------------------------- + +# @@PLEAC@@_2.3 +#----------------------------- +rounded = round(num) # rounds to integer +#----------------------------- +a = 0.255 +b = "%.2f" % a +print "Unrounded: %f\nRounded: %s" % (a, b) +print "Unrounded: %f\nRounded: %.2f" % (a, a) +#=> Unrounded: 0.255000 +#=> Rounded: 0.26 +#=> Unrounded: 0.255000 +#=> Rounded: 0.26 +#----------------------------- +from math import floor, ceil + +print "number\tint\tfloor\tceil" +a = [3.3, 3.5, 3.7, -3.3] +for n in a: + print "% .1f\t% .1f\t% .1f\t% .1f" % (n, int(n), floor(n), ceil(n)) +#=> number int floor ceil +#=> 3.3 3.0 3.0 4.0 +#=> 3.5 3.0 3.0 4.0 +#=> 3.7 3.0 3.0 4.0 +#=> -3.3 -3.0 -4.0 -3.0 +#----------------------------- + +# @@PLEAC@@_2.4 +#----------------------------- +# To convert a string in any base up to base 36, use the optional arg to int(): +num = int('0110110', 2) # num is 54 + +# To convert an int to an string representation in another base, you could use +# <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/111286>: +import baseconvert +def dec2bin(i): + return baseconvert.baseconvert(i, baseconvert.BASE10, baseconvert.BASE2) + +binstr = dec2bin(54) # binstr is 110110 +#----------------------------- + +# @@PLEAC@@_2.5 +#----------------------------- +for i in range(x,y): + pass # i is set to every integer from x to y, excluding y + +for i in range(x, y, 7): + pass # i is set to every integer from x to y, stepsize = 7 + +print "Infancy is:", +for i in range(0,3): + print i, +print + +print "Toddling is:", +for i in range(3,5): + print i, +print + +# DON'T DO THIS: +print "Childhood is:", +i = 5 +while i <= 12: + print i + i += 1 + +#=> Infancy is: 0 1 2 +#=> Toddling is: 3 4 +#=> Childhood is: 5 6 7 8 9 10 11 12 +#----------------------------- + +# @@PLEAC@@_2.6 +#----------------------------- +# See http://www.faqts.com/knowledge_base/view.phtml/aid/4442 +# for a module that does this +#----------------------------- + +# @@PLEAC@@_2.7 +#----------------------------- +import random # use help(random) to see the (large) list of funcs + +rand = random.randint(x, y) +#----------------------------- +rand = random.randint(25, 76) +print rand +#----------------------------- +elt = random.choice(mylist) +#----------------------------- +import string +chars = string.letters + string.digits + "!@$%^&*" +password = "".join([random.choice(chars) for i in range(8)]) +#----------------------------- + +# @@PLEAC@@_2.8 +#----------------------------- +# Changes the default RNG +random.seed() + +# Or you can create independent RNGs +gen1 = random.Random(6) +gen2 = random.Random(6) +gen3 = random.Random(10) +a1, b1 = gen1.random(), gen1.random() +a2, b2 = gen2.random(), gen2.random() +a3, b3 = gen3.random(), gen3.random() +# a1 == a2 and b1 == b2 +#----------------------------- + +# @@PLEAC@@_2.9 +#----------------------------- +# see http://www.sbc.su.se/~per/crng/ or http://www.frohne.westhost.com/rv11reference.htm +#----------------------------- + +# @@PLEAC@@_2.10 +#----------------------------- +import random +mean = 25 +sdev = 2 +salary = random.gauss(mean, sdev) +print "You have been hired at %.2f" % salary +#----------------------------- + +# @@PLEAC@@_2.11 +#----------------------------- +radians = math.radians(degrees) +degrees = math.degrees(radians) + +# pre-2.3: +from __future__ import division +import math +def deg2rad(degrees): + return (degrees / 180) * math.pi +def rad2deg(radians): + return (radians / math.pi) * 180 +#----------------------------- +# Use deg2rad instead of math.radians if you have pre-2.3 Python. +import math +def degree_sine(degrees): + radians = math.radians(degrees) + return math.sin(radians) +#----------------------------- + +# @@PLEAC@@_2.12 +#----------------------------- +import math + +# DON'T DO THIS. Use math.tan() instead. +def tan(theta): + return math.sin(theta) / math.cos(theta) +#---------------- +# NOTE: this sets y to 16331239353195370.0 +try: + y = math.tan(math.pi/2) +except ValueError: + y = None +#----------------------------- + +# @@PLEAC@@_2.13 +#----------------------------- +import math +log_e = math.log(VALUE) +#----------------------------- +log_10 = math.log10(VALUE) +#----------------------------- +def log_base(base, value): + return math.log(value) / math.log(base) +#----------------------------- +# log_base defined as above +answer = log_base(10, 10000) +print "log10(10,000) =", answer +#=> log10(10,000) = 4.0 +#----------------------------- + +# @@PLEAC@@_2.14 +#----------------------------- +# NOTE: must have NumPy installed. See +# http://www.pfdubois.com/numpy/ + +import Numeric +a = Numeric.array( ((3, 2, 3), + (5, 9, 8) ), "d") +b = Numeric.array( ((4, 7), + (9, 3), + (8, 1) ), "d") +c = Numeric.matrixmultiply(a, b) + +print c +#=> [[ 54. 30.] +#=> [ 165. 70.]] + +print a.shape, b.shape, c.shape +#=> (2, 3) (3, 2) (2, 2) +#----------------------------- + +# @@PLEAC@@_2.15 +#----------------------------- +a = 3+5j +b = 2-2j +c = a * b +print "c =", c +#=> c = (16+4j) + +print c.real, c.imag, c.conjugate() +#=> 16.0 4.0 (16-4j) +#----------------------------- +import cmath +print cmath.sqrt(3+4j) +#=> (2+1j) +#----------------------------- + +# @@PLEAC@@_2.16 +#----------------------------- +number = int(hexadecimal, 16) +number = int(octal, 8) +s = hex(number) +s = oct(number) + +num = raw_input("Gimme a number in decimal, octal, or hex: ").rstrip() +if num.startswith("0x"): + num = int(num[2:], 16) +elif num.startswith("0"): + num = int(num[1:], 8) +else: + num = int(num) +print "%(num)d %(num)x %(num)o\n" % { "num": num } +#----------------------------- + + +# @@PLEAC@@_2.17 +#----------------------------- +def commify(amount): + amount = str(amount) + firstcomma = len(amount)%3 or 3 # set to 3 if would make a leading comma + first, rest = amount[:firstcomma], amount[firstcomma:] + segments = [first] + [rest[i:i+3] for i in range(0, len(rest), 3)] + return ",".join(segments) + +print commify(12345678) +#=> 12,345,678 + +# DON'T DO THIS. It works on 2.3+ only and is slower and less straightforward +# than the non-regex version above. +import re +def commify(amount): + amount = str(amount) + amount = amount[::-1] + amount = re.sub(r"(\d\d\d)(?=\d)(?!\d*\.)", r"\1,", amount) + return amount[::-1] + +# @@PLEAC@@_2.18 +# Printing Correct Plurals +#----------------------------- +def pluralise(value, root, singular="", plural="s"): + if value == 1: + return root + singular + else: + return root + plural + +print "It took", duration, pluralise(duration, 'hour') + +print "%d %s %s enough." % (duration, + pluralise(duration, 'hour'), + pluralise(duration, '', 'is', 'are')) +#----------------------------- +import re +def noun_plural(word): + endings = [("ss", "sses"), + ("([psc]h)", r"\1es"), + ("z", "zes"), + ("ff", "ffs"), + ("f", "ves"), + ("ey", "eys"), + ("y", "ies"), + ("ix", "ices"), + ("([sx])", r"\1es"), + ("", "s")] + for singular, plural in endings: + ret, found = re.subn("%s$"%singular, plural, word) + if found: + return ret + +verb_singular = noun_plural; # make function alias +#----------------------------- + +# @@PLEAC@@_2.19 +# Program: Calculating Prime Factors +#----------------------------- +#% bigfact 8 9 96 2178 +#8 2**3 +# +#9 3**2 +# +#96 2**5 3 +# +#2178 2 3**2 11**2 +#----------------------------- +#% bigfact 239322000000000000000000 +#239322000000000000000000 2**19 3 5**18 39887 +# +# +#% bigfact 25000000000000000000000000 +#25000000000000000000000000 2**24 5**26 +#----------------------------- +import sys + +def factorise(num): + factors = {} + orig = num + print num, '\t', + + # we take advantage of the fact that (i +1)**2 = i**2 + 2*i +1 + i, sqi = 2, 4 + while sqi <= num: + while not num%i: + num /= i + factors[i] = factors.get(i, 0) + 1 + + sqi += 2*i + 1 + i += 1 + + if num != 1 and num != orig: + factors[num] = factors.get(num, 0) + 1 + + if not factors: + print "PRIME" + + for factor in sorted(factors): + if factor: + tmp = str(factor) + if factors[factor]>1: tmp += "**" + str(factors[factor]) + print tmp, + print + +#-------- +if __name__ == '__main__': + if len(sys.argv) == 1: + print "Usage:", sys.argv[0], " number [number, ]" + else: + for strnum in sys.argv[1:]: + try: + num = int(strnum) + factorise(num) + except ValueError: + print strnum, "is not an integer" +#----------------------------- +# A more Pythonic variant (which separates calculation from printing): +def format_factor(base, exponent): + if exponent > 1: + return "%s**%s"%(base, exponent) + return str(base) + +def factorise(num): + factors = {} + orig = num + + # we take advantage of the fact that (i+1)**2 = i**2 + 2*i +1 + i, sqi = 2, 4 + while sqi <= num: + while not num%i: + num /= i + factors[i] = factors.get(i, 0) + 1 + sqi += 2*i + 1 + i += 1 + + if num not in (1, orig): + factors[num] = factors.get(num, 0) + 1 + + if not factors: + return ["PRIME"] + + out = [format_factor(base, exponent) + for base, exponent in sorted(factors.items())] + return out + +def print_factors(value): + try: + num = int(value) + if num != float(value): + raise ValueError + except (ValueError, TypeError): + raise ValueError("Can only factorise an integer") + factors = factorise(num) + print num, "\t", " ".join(factors) + +# @@PLEAC@@_3.0 +#----------------------------- +#introduction +# There are three common ways of manipulating dates in Python +# mxDateTime - a popular third-party module (not discussed here) +# time - a fairly low-level standard library module +# datetime - a new library module for Python 2.3 and used for most of these samples +# (I will use full names to show which module they are in, but you can also use +# from datetime import datetime, timedelta and so on for convenience) + +import time +import datetime + +print "Today is day", time.localtime()[7], "of the current year" +# Today is day 218 of the current year + +today = datetime.date.today() +print "Today is day", today.timetuple()[7], "of ", today.year +# Today is day 218 of 2003 + +print "Today is day", today.strftime("%j"), "of the current year" +# Today is day 218 of the current year + + +# @@PLEAC@@_3.1 +#----------------------------- +# Finding todays date + +today = datetime.date.today() +print "The date is", today +#=> The date is 2003-08-06 + +# the function strftime() (string-format time) produces nice formatting +# All codes are detailed at http://www.python.org/doc/current/lib/module-time.html +print t.strftime("four-digit year: %Y, two-digit year: %y, month: %m, day: %d") +#=> four-digit year: 2003, two-digit year: 03, month: 08, day: 06 + + +# @@PLEAC@@_3.2 +#----------------------------- +# Converting DMYHMS to Epoch Seconds +# To work with Epoch Seconds, you need to use the time module + +# For the local timezone +t = datetime.datetime.now() +print "Epoch Seconds:", time.mktime(t.timetuple()) +#=> Epoch Seconds: 1060199000.0 + +# For UTC +t = datetime.datetime.utcnow() +print "Epoch Seconds:", time.mktime(t.timetuple()) +#=> Epoch Seconds: 1060195503.0 + + +# @@PLEAC@@_3.3 +#----------------------------- +# Converting Epoch Seconds to DMYHMS + +now = datetime.datetime.fromtimestamp(EpochSeconds) +#or use datetime.datetime.utcfromtimestamp() +print now +#=> datetime.datetime(2003, 8, 6, 20, 43, 20) +print now.ctime() +#=> Wed Aug 6 20:43:20 2003 + +# or with the time module +oldtimetuple = time.localtime(EpochSeconds) +# oldtimetuple contains (year, month, day, hour, minute, second, weekday, yearday, daylightSavingAdjustment) +print oldtimetuple +#=> (2003, 8, 6, 20, 43, 20, 2, 218, 1) + + +# @@PLEAC@@_3.4 +#----------------------------- +# Adding to or Subtracting from a Date +# Use the rather nice datetime.timedelta objects + +now = datetime.date(2003, 8, 6) +difference1 = datetime.timedelta(days=1) +difference2 = datetime.timedelta(weeks=-2) + +print "One day in the future is:", now + difference1 +#=> One day in the future is: 2003-08-07 + +print "Two weeks in the past is:", now + difference2 +#=> Two weeks in the past is: 2003-07-23 + +print datetime.date(2003, 8, 6) - datetime.date(2000, 8, 6) +#=> 1095 days, 0:00:00 + +#----------------------------- +birthtime = datetime.datetime(1973, 01, 18, 3, 45, 50) # 1973-01-18 03:45:50 + +interval = datetime.timedelta(seconds=5, minutes=17, hours=2, days=55) +then = birthtime + interval + +print "Then is", then.ctime() +#=> Then is Wed Mar 14 06:02:55 1973 + +print "Then is", then.strftime("%A %B %d %I:%M:%S %p %Y") +#=> Then is Wednesday March 14 06:02:55 AM 1973 + +#----------------------------- +when = datetime.datetime(1973, 1, 18) + datetime.timedelta(days=55) +print "Nat was 55 days old on:", when.strftime("%m/%d/%Y").lstrip("0") +#=> Nat was 55 days old on: 3/14/1973 + + +# @@PLEAC@@_3.5 +#----------------------------- +# Dates produce timedeltas when subtracted. + +diff = date2 - date1 +diff = datetime.date(year1, month1, day1) - datetime.date(year2, month2, day2) +#----------------------------- + +bree = datetime.datetime(1981, 6, 16, 4, 35, 25) +nat = datetime.datetime(1973, 1, 18, 3, 45, 50) + +difference = bree - nat +print "There were", difference, "minutes between Nat and Bree" +#=> There were 3071 days, 0:49:35 between Nat and Bree + +weeks, days = divmod(difference.days, 7) + +minutes, seconds = divmod(difference.seconds, 60) +hours, minutes = divmod(minutes, 60) + +print "%d weeks, %d days, %d:%d:%d" % (weeks, days, hours, minutes, seconds) +#=> 438 weeks, 5 days, 0:49:35 + +#----------------------------- +print "There were", difference.days, "days between Bree and Nat." +#=> There were 3071 days between bree and nat + + +# @@PLEAC@@_3.6 +#----------------------------- +# Day in a Week/Month/Year or Week Number + +when = datetime.date(1981, 6, 16) + +print "16/6/1981 was:" +print when.strftime("Day %w of the week (a %A). Day %d of the month (%B).") +print when.strftime("Day %j of the year (%Y), in week %W of the year.") + +#=> 16/6/1981 was: +#=> Day 2 of the week (a Tuesday). Day 16 of the month (June). +#=> Day 167 of the year (1981), in week 24 of the year. + + +# @@PLEAC@@_3.7 +#----------------------------- +# Parsing Dates and Times from Strings + +time.strptime("Tue Jun 16 20:18:03 1981") +# (1981, 6, 16, 20, 18, 3, 1, 167, -1) + +time.strptime("16/6/1981", "%d/%m/%Y") +# (1981, 6, 16, 0, 0, 0, 1, 167, -1) +# strptime() can use any of the formatting codes from time.strftime() + +# The easiest way to convert this to a datetime seems to be; +now = datetime.datetime(*time.strptime("16/6/1981", "%d/%m/%Y")[0:5]) +# the '*' operator unpacks the tuple, producing the argument list. + + +# @@PLEAC@@_3.8 +#----------------------------- +# Printing a Date +# Use datetime.strftime() - see helpfiles in distro or at python.org + +print datetime.datetime.now().strftime("The date is %A (%a) %d/%m/%Y") +#=> The date is Friday (Fri) 08/08/2003 + +# @@PLEAC@@_3.9 +#----------------------------- +# High Resolution Timers + +t1 = time.clock() +# Do Stuff Here +t2 = time.clock() +print t2 - t1 + +# 2.27236813618 +# Accuracy will depend on platform and OS, +# but time.clock() uses the most accurate timer it can + +time.clock(); time.clock() +# 174485.51365466841 +# 174485.55702610247 + +#----------------------------- +# Also useful; +import timeit +code = '[x for x in range(10) if x % 2 == 0]' +eval(code) +# [0, 2, 4, 6, 8] + +t = timeit.Timer(code) +print "10,000 repeats of that code takes:", t.timeit(10000), "seconds" +print "1,000,000 repeats of that code takes:", t.timeit(), "seconds" + +# 10,000 repeats of that code takes: 0.128238644856 seconds +# 1,000,000 repeats of that code takes: 12.5396490336 seconds + +#----------------------------- +import timeit +code = 'import random; l = random.sample(xrange(10000000), 1000); l.sort()' +t = timeit.Timer(code) + +print "Create a list of a thousand random numbers. Sort the list. Repeated a thousand times." +print "Average Time:", t.timeit(1000) / 1000 +# Time taken: 5.24391507859 + + +# @@PLEAC@@_3.10 +#----------------------------- +# Short Sleeps + +seconds = 3.1 +time.sleep(seconds) +print "boo" + +# @@PLEAC@@_3.11 +#----------------------------- +# Program HopDelta +# Save a raw email to disk and run "python hopdelta.py FILE" +# and it will process the headers and show the time taken +# for each server hop (nb: if server times are wrong, negative dates +# might appear in the output). + +import datetime, email, email.Utils +import os, sys, time + +def extract_date(hop): + # According to RFC822, the date will be prefixed with + # a semi-colon, and is the last part of a received + # header. + date_string = hop[hop.find(';')+2:] + date_string = date_string.strip() + time_tuple = email.Utils.parsedate(date_string) + + # convert time_tuple to datetime + EpochSeconds = time.mktime(time_tuple) + dt = datetime.datetime.fromtimestamp(EpochSeconds) + return dt + +def process(filename): + # Main email file processing + # read the headers and process them + f = file(filename, 'rb') + msg = email.message_from_file(f) + + hops = msg.get_all('received') + + # in reverse order, get the server(s) and date/time involved + hops.reverse() + results = [] + for hop in hops: + hop = hop.lower() + + if hop.startswith('by'): # 'Received: by' line + sender = "start" + receiver = hop[3:hop.find(' ',3)] + date = extract_date(hop) + + else: # 'Received: from' line + sender = hop[5:hop.find(' ',5)] + by = hop.find('by ')+3 + receiver = hop[by:hop.find(' ', by)] + date = extract_date(hop) + + results.append((sender, receiver, date)) + output(results) + +def output(results): + print "Sender, Recipient, Time, Delta" + print + previous_dt = delta = 0 + for (sender, receiver, date) in results: + if previous_dt: + delta = date - previous_dt + + print "%s, %s, %s, %s" % (sender, + receiver, + date.strftime("%Y/%d/%m %H:%M:%S"), + delta) + print + previous_dt = date + +def main(): + # Perform some basic argument checking + if len(sys.argv) != 2: + print "Usage: mailhop.py FILENAME" + + else: + filename = sys.argv[1] + if os.path.isfile(filename): + process(filename) + else: + print filename, "doesn't seem to be a valid file." + +if __name__ == '__main__': + main() + + +# @@PLEAC@@_4.0 +#----------------------------- +# Python does not automatically flatten lists, in other words +# in the following, non-nested contains four elements and +# nested contains three elements, the third element of which +# is itself a list containing two elements: +non_nested = ["this", "that", "the", "other"] +nested = ["this", "that", ["the", "other"]] +#----------------------------- +tune = ["The", "Star-Spangled", "Banner"] +#----------------------------- + +# @@PLEAC@@_4.1 +#----------------------------- +a = ["quick", "brown", "fox"] +a = "Why are you teasing me?".split() + +text = """ + The boy stood on the burning deck, + It was as hot as glass. +""" +lines = [line.lstrip() for line in text.strip().split("\n")] +#----------------------------- +biglist = [line.rstrip() for line in open("mydatafile")] +#----------------------------- +banner = "The Mines of Moria" +banner = 'The Mines of Moria' +#----------------------------- +name = "Gandalf" +banner = "Speak, " + name + ", and enter!" +banner = "Speak, %s, and welcome!" % name +#----------------------------- +his_host = "www.python.org" +import os +host_info = os.popen("nslookup " + his_host).read() + +# NOTE: not really relevant to Python (no magic '$$' variable) +python_info = os.popen("ps %d" % os.getpid()).read() +shell_info = os.popen("ps $$").read() +#----------------------------- +# NOTE: not really relevant to Python (no automatic interpolation) +banner = ["Costs", "only", "$4.95"] +banner = "Costs only $4.95".split() +#----------------------------- +brax = """ ' " ( ) < > { } [ ] """.split() #""" +brax = list("""'"()<>{}[]""") #""" +rings = '''They're "Nenya Narya Vilya"'''.split() #''' +tags = 'LI TABLE TR TD A IMG H1 P'.split() +sample = r'The backslash (\) is often used in regular expressions.'.split() + +#----------------------------- +banner = "The backslash (\\) is often used in regular expressions.".split() +#----------------------------- +ships = u"Niña Pinta Santa MarÃa".split() # WRONG (only three ships) +ships = [u"Niña", u"Pinta", u"Santa MarÃa"] # right +#----------------------------- + +# @@PLEAC@@_4.2 +#----------------------------- +def commify_series(args): + n = len(args) + if n == 0: + return "" + elif n == 1: + return args[0] + elif n == 2: + return args[0] + " and " + args[1] + return ", ".join(args[:-1]) + ", and " + args[-1] + +commify_series([]) +commify_series(["red"]) +commify_series(["red", "yellow"]) +commify_series(["red", "yellow", "green"]) +#----------------------------- +mylist = ["red", "yellow", "green"] +print "I have", mylist, "marbles." +print "I have", " ".join(mylist), "marbles." +#=> I have ['red', 'yellow', 'green'] marbles. +#=> I have red yellow green marbles. + +#----------------------------- +#!/usr/bin/env python +# commify_series - show proper comma insertion in list output +data = ( + ( 'just one thing', ), + ( 'Mutt Jeff'.split() ), + ( 'Peter Paul Mary'.split() ), + ( 'To our parents', 'Mother Theresa', 'God' ), + ( 'pastrami', 'ham and cheese', 'peanut butter and jelly', 'tuna' ), + ( 'recycle tired, old phrases', 'ponder big, happy thoughts' ), + ( 'recycle tired, old phrases', + 'ponder big, happy thoughts', + 'sleep and dream peacefully' ), + ) + +def commify_series(terms): + for term in terms: + if "," in term: + sepchar = "; " + break + else: + sepchar = ", " + + n = len(terms) + if n == 0: + return "" + elif n == 1: + return terms[0] + elif n == 2: + return " and ".join(terms) + return "%s%sand %s" % (sepchar.join(terms[:-1]), sepchar, terms[-1]) + +for item in data: + print "The list is: %s." % commify_series(item) + +#=> The list is: just one thing. +#=> The list is: Mutt and Jeff. +#=> The list is: Peter, Paul, and Mary. +#=> The list is: To our parents, Mother Theresa, and God. +#=> The list is: pastrami, ham and cheese, peanut butter and jelly, and tuna. +#=> The list is: recycle tired, old phrases and ponder big, happy thoughts. +#=> The list is: recycle tired, old phrases; ponder big, happy thoughts; and +# sleep and dream peacefully. +#----------------------------- + +# @@PLEAC@@_4.3 +#----------------------------- +# Python allocates more space than is necessary every time a list needs to +# grow and only shrinks lists when more than half the available space is +# unused. This means that adding or removing an element will in most cases +# not force a reallocation. + +del mylist[size:] # shrink mylist +mylist += [None] * size # grow mylist by appending 'size' None elements + +# To add an element to the end of a list, use the append method: +mylist.append(4) + +# To insert an element, use the insert method: +mylist.insert(0, 10) # Insert 10 at the beginning of the list + +# To extend one list with the contents of another, use the extend method: +list2 = [1,2,3] +mylist.extend(list2) + +# To insert the contents of one list into another, overwriting zero or +# more elements, specify a slice: +mylist[1:1] = list2 # Don't overwrite anything; grow mylist if needed +mylist[2:3] = list2 # Overwrite mylist[2] and grow mylist if needed + +# To remove one element from the middle of a list: +# To remove elements from the middle of a list: +del mylist[idx1:idx2] # 0 or more +x = mylist.pop(idx) # remove mylist[idx] and assign it to x + +# You cannot assign to or get a non-existent element: +# >>> x = [] +# >>> x[4] = 5 +# +# Traceback (most recent call last): +# File "<pyshell#1>", line 1, in -toplevel- +# x[4] = 5 +# IndexError: list assignment index out of range +# +# >>> print x[1000] +# +# Traceback (most recent call last): +# File "<pyshell#16>", line 1, in -toplevel- +# print x[1000] +# IndexError: list index out of range +#----------------------------- +def what_about_that_list(terms): + print "The list now has", len(terms), "elements." + print "The index of the last element is", len(terms)-1, "(or -1)." + print "Element #3 is %s." % terms[3] + +people = "Crosby Stills Nash Young".split() +what_about_that_list(people) +#----------------------------- +#=> The list now has 4 elements. +#=> The index of the last element is 3 (or -1). +#=> Element #3 is Young. +#----------------------------- +people.pop() +what_about_that_list(people) +#----------------------------- +people += [None] * (10000 - len(people)) +#----------------------------- +#>>> people += [None] * (10000 - len(people)) +#>>> what_about_that_list(people) +#The list now has 10000 elements. +#The index of the last element is 9999 (or -1). +#Element #3 is None. +#----------------------------- + +# @@PLEAC@@_4.4 +#----------------------------- +for item in mylist: + pass # do something with item +#----------------------------- +for user in bad_users: + complain(user) +#----------------------------- +import os +for (key, val) in sorted(os.environ.items()): + print "%s=%s" % (key, val) +#----------------------------- +for user in all_users: + disk_space = get_usage(user) # find out how much disk space in use + if disk_space > MAX_QUOTA: # if it's more than we want ... + complain(user) # ... then object vociferously +#----------------------------- +import os +for line in os.popen("who"): + if "dalke" in line: + print line, # or print line[:-1] + +# or: +print "".join([line for line in os.popen("who") + if "dalke" in line]), + +#----------------------------- +for line in myfile: + for word in line.split(): # Split on whitespace + print word[::-1], # reverse word + print + +# pre 2.3: +for line in myfile: + for word in line.split(): # Split on whitespace + chars = list(word) # Turn the string into a list of characters + chars.reverse() + print "".join(chars), + print +#----------------------------- +for item in mylist: + print "i =", item +#----------------------------- +# NOTE: you can't modify in place the way Perl does: +# data = [1, 2, 3] +# for elem in data: +# elem -= 1 +#print data +#=>[1, 2, 3] + +data = [1, 2, 3] +data = [i-1 for i in data] +print data +#=>[0, 1, 2] + +# or +for i, elem in enumerate(data): + data[i] = elem - 1 +#----------------------------- +# NOTE: strings are immutable in Python so this doesn't translate well. +s = s.strip() +data = [s.strip() for s in data] +for k, v in mydict.items(): + mydict[k] = v.strip() +#----------------------------- + +# @@PLEAC@@_4.5 +#----------------------------- +fruits = ["Apple", "Blackberry"] +for fruit in fruits: + print fruit, "tastes good in a pie." +#=> Apple tastes good in a pie. +#=> Blackberry tastes good in a pie. +#----------------------------- +# DON'T DO THIS: +for i in range(len(fruits)): + print fruits[i], "tastes good in a pie." + +# If you must explicitly index, use enumerate(): +for i, fruit in enumerate(fruits): + print "%s) %s tastes good in a pie."%(i+1, fruit) +#----------------------------- +rogue_cats = ["Morris", "Felix"] +namedict = { "felines": rogue_cats } +for cat in namedict["felines"]: + print cat, "purrs hypnotically." +print "--More--\nYou are controlled." +#----------------------------- +# As noted before, if you need an index, use enumerate() and not this: +for i in range(len(namedict["felines"])): + print namedict["felines"][i], "purrs hypnotically." +#----------------------------- + +# @@PLEAC@@_4.6 +#----------------------------- +uniq = list(set(mylist)) +#----------------------------- +# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259174 +# for a more heavyweight version of a bag +seen = {} +for item in mylist: + seen[item] = seen.get(item, 0) + 1 + +uniq = seen.keys() +#----------------------------- +seen = {} +uniq = [] +for item in mylist: + count = seen.get(item, 0) + if count == 0: + uniq.append(item) + seen[item] = count + 1 +#----------------------------- +# generate a list of users logged in, removing duplicates +import os +usernames = [line.split()[0] for line in os.popen("who")] +uniq = sorted(set(usernames)) +print "users logged in:", " ".join(uniq) + +# DON'T DO THIS: +import os +ucnt = {} +for line in os.popen("who"): + username = line.split()[0] # Get the first word + ucnt[username] = ucnt.get(username, 0) + 1 # record the users' presence + +# extract and print unique keys +users = ucnt.keys() +users.sort() +print "users logged in:", " ".join(users) +#----------------------------- + +# @@PLEAC@@_4.7 +#----------------------------- +# assume a_list and b_list are already loaded +aonly = [item for item in a_list if item not in b_list] + +# A slightly more complex Pythonic version using sets - if you had a few +# lists, subtracting sets would be clearer than the listcomp version above +a_set = set(a_list) +b_set = set(b_list) +aonly = list(a_set - b_set) # Elements in a_set but not in b_set + +# DON'T DO THIS. +seen = {} # lookup table to test membership of B +aonly = [] # answer + +# build lookup table +for item in b_list: + seen[item] = 1 + +# find only elements in a_list and not in b_list +for item in a_list: + if not item not in seen: + # it's not in 'seen', so add to 'aonly' + aonly.append(item) +#----------------------------- +# DON'T DO THIS. There's lots of ways not to do it. +seen = {} # lookup table +aonly = [] # answer + +# build lookup table - unnecessary and poor Python style +[seen.update({x: 1}) for x in b_list] + +aonly = [item for item in a_list if item not in seen] + +#----------------------------- +aonly = list(set(a_list)) + +# DON'T DO THIS. +seen = {} +aonly = [] +for item in a_list: + if item not in seen: + aonly.append(item) + seen[item] = 1 # mark as seen +#----------------------------- +mydict["key1"] = 1 +mydict["key2"] = 2 +#----------------------------- +mydict[("key1", "key2")] = (1,2) +#----------------------------- +# DON'T DO THIS: +seen = dict.fromkeys(B.keys()) + +# DON'T DO THIS pre-2.3: +seen = {} +for term in B: + seen[term] = None +#----------------------------- +# DON'T DO THIS: +seen = {} +for k, v in B: + seen[k] = 1 +#----------------------------- + +# @@PLEAC@@_4.8 +#----------------------------- +a = (1, 3, 5, 6, 7, 8) +b = (2, 3, 5, 7, 9) + +a_set = set(a) +b_set = set(b) + +union = a_set | b_set # or a_set.union(b_set) +isect = a_set & b_set # or a_set.intersection(b_set) +diff = a_set ^ b_set # or a_set.symmetric_difference(b_set) + + +# DON'T DO THIS: +union_list = []; isect_list = []; diff = [] +union_dict = {}; isect_dict = {} +count = {} +#----------------------------- +# DON'T DO THIS: +for e in a: + union_dict[e] = 1 + +for e in b: + if union_dict.has_key(e): + isect_dict[e] = 1 + union_dict[e] = 1 + +union_list = union_dict.keys() +isect_list = isect_dict.keys() +#----------------------------- +# DON'T DO THIS: +for e in a + b: + if union.get(e, 0) == 0: + isect[e] = 1 + union[e] = 1 + +union = union.keys() +isect = isect.keys() +#----------------------------- +# DON'T DO THIS: +count = {} +for e in a + b: + count[e] = count.get(e, 0) + 1 + +union = []; isect = []; diff = [] + +for e in count.keys(): + union.append(e) + if count[e] == 2: + isect.append(e) + else: + diff.append(e) +#----------------------------- +# DON'T DO THIS: +isect = []; diff = []; union = [] +count = {} +for e in a + b: + count[e] = count.get(e, 0) + 1 + +for e, num in count.items(): + union.append(e) + [None, diff, isect][num].append(e) +#----------------------------- + +# @@PLEAC@@_4.9 +#----------------------------- +# "append" for a single term and +# "extend" for many terms +mylist1.extend(mylist2) +#----------------------------- +mylist1 = mylist1 + mylist2 +mylist1 += mylist2 +#----------------------------- +members = ["Time", "Flies"] +initiates = ["An", "Arrow"] +members.extend(initiates) +# members is now ["Time", "Flies", "An", "Arrow"] +#----------------------------- +members[2:] = ["Like"] + initiates +print " ".join(members) +members[:1] = ["Fruit"] # or members[1] = "Fruit" +members[-2:] = ["A", "Banana"] +print " ".join(members) +#----------------------------- +#=> Time Flies Like An Arrow +#=> Fruit Flies Like A Banana +#----------------------------- + +# @@PLEAC@@_4.10 +#----------------------------- +# reverse mylist into revlist + +revlist = mylist[::-1] + +# or +revlist = list(reversed(mylist)) + +# or pre-2.3 +revlist = mylist[:] # shallow copy +revlist.reverse() +#----------------------------- +for elem in reversed(mylist): + pass # do something with elem + +# or +for elem in mylist[::-1]: + pass # do something with elem + +# if you need the index and the list won't take too much memory: +for i, elem in reversed(list(enumerate(mylist))): + pass + +# If you absolutely must explicitly index: +for i in range(len(mylist)-1, -1, -1): + pass +#----------------------------- +descending = sorted(users, reverse=True) +#----------------------------- + +# @@PLEAC@@_4.11 +#----------------------------- +# remove n elements from the front of mylist +mylist[:n] = [] # or del mylist[:n] + +# remove n elements from front of mylist, saving them into front +front, mylist[:n] = mylist[:n], [] + +# remove 1 element from the front of mylist, saving it in front: +front = mylist.pop(0) + +# remove n elements from the end of mylist +mylist[-n:] = [] # or del mylist[-n:] + +# remove n elements from the end of mylist, saving them in end +end, mylist[-n:] = mylist[-n:], [] + +# remove 1 element from the end of mylist, saving it in end: +end = mylist.pop() + +#----------------------------- +def shift2(terms): + front = terms[:2] + terms[:2] = [] + return front + +def pop2(terms): + back = terms[-2:] + terms[-2:] = [] + return back +#----------------------------- +friends = "Peter Paul Mary Jim Tim".split() +this, that = shift2(friends) +# 'this' contains Peter, 'that' has Paul, and +# 'friends' has Mary, Jim, and Tim + +beverages = "Dew Jolt Cola Sprite Fresca".split() +pair = pop2(beverages) +# pair[0] contains Sprite, pair[1] has Fresca, +# and 'beverages' has (Dew, Jolt, Cola) + +# In general you probably shouldn't do things that way because it's +# not clear from these calls that the lists are modified. +#----------------------------- + +# @@PLEAC@@_4.12 +for item in mylist: + if criterion: + pass # do something with matched item + break +else: + pass # unfound +#----------------------------- +for idx, elem in enumerate(mylist): + if criterion: + pass # do something with elem found at mylist[idx] + break +else: + pass ## unfound +#----------------------------- +# Assuming employees are sorted high->low by wage. +for employee in employees: + if employee.category == 'engineer': + highest_engineer = employee + break + +print "Highest paid engineer is:", highest_engineer.name +#----------------------------- +# If you need the index, use enumerate: +for i, employee in enumerate(employees): + if employee.category == 'engineer': + highest_engineer = employee + break +print "Highest paid engineer is: #%s - %s" % (i, highest_engineer.name) + + +# The following is rarely appropriate: +for i in range(len(mylist)): + if criterion: + pass # do something + break +else: + pass ## not found +#----------------------------- + + +# @@PLEAC@@_4.13 +matching = [term for term in mylist if test(term)] +#----------------------------- +matching = [] +for term in mylist: + if test(term): + matching.append(term) +#----------------------------- +bigs = [num for num in nums if num > 1000000] +pigs = [user for (user, val) in users.items() if val > 1e7] +#----------------------------- +import os +matching = [line for line in os.popen("who") + if line.startswith("gnat ")] +#----------------------------- +engineers = [employee for employee in employees + if employee.position == "Engineer"] +#----------------------------- +secondary_assistance = [applicant for applicant in applicants + if 26000 <= applicant.income < 30000] +#----------------------------- + +# @@PLEAC@@_4.14 +sorted_list = sorted(unsorted_list) +#----------------------------- +# pids is an unsorted list of process IDs +import os, signal, time +for pid in sorted(pids): + print pid + +pid = raw_input("Select a process ID to kill: ") +try: + pid = int(pid) +except ValueError: + raise SystemExit("Exiting ... ") +os.kill(pid, signal.SIGTERM) +time.sleep(2) +try: + os.kill(pid, signal.SIGKILL) +except OSError, err: + if err.errno != 3: # was it already killed? + raise +#----------------------------- +descending = sorted(unsorted_list, reverse=True) +#----------------------------- +allnums = [4, 19, 8, 3] +allnums.sort(reverse=True) # inplace +#----------------------------- +# pre 2.3 +allnums.sort() # inplace +allnums.reverse() # inplace +#or +allnums = sorted(allnums, reverse=True) # reallocating +#----------------------------- + +# @@PLEAC@@_4.15 +ordered = sorted(unordered, cmp=compare) +#----------------------------- +ordered = sorted(unordered, key=compute) + +# ...which is somewhat equivalent to: +precomputed = [(compute(x), x) for x in unordered] +precomputed.sort(lambda a, b: cmp(a[0], b[0])) +ordered = [v for k,v in precomputed.items()] +#----------------------------- +# DON'T DO THIS. +def functional_sort(mylist, function): + mylist.sort(function) + return mylist + +ordered = [v for k,v in functional_sort([(compute(x), x) for x in unordered], + lambda a, b: cmp(a[0], b[0]))] +#----------------------------- +ordered = sorted(employees, key=lambda x: x.name) +#----------------------------- +for employee in sorted(employees, key=lambda x: x.name): + print "%s earns $%s" % (employee.name, employee.salary) +#----------------------------- +sorted_employees = sorted(employees, key=lambda x: x.name): +for employee in sorted_employees: + print "%s earns $%s" % (employee.name, employee.salary) + +# load bonus +for employee in sorted_employees: + if bonus(employee.ssn): + print employee.name, "got a bonus!" +#----------------------------- +sorted_employees = sorted(employees, key=lambda x: (x.name, x.age)): +#----------------------------- +# NOTE: Python should allow access to the pwd fields by name +# as well as by position. +import pwd +# fetch all users +users = pwd.getpwall() +for user in sorted(users, key=lambda x: x[0]): + print user[0] +#----------------------------- +sorted_list = sorted(names, key=lambda x: x[:1]) +#----------------------------- +sorted_list = sorted(strings, key=len) +#----------------------------- +# DON'T DO THIS. +temp = [(len(s), s) for s in strings] +temp.sort(lambda a, b: cmp(a[0], b[0])) +sorted_list = [x[1] for x in temp] +#----------------------------- +# DON'T DO THIS. +def functional_sort(mylist, function): + mylist.sort(function) + return mylist + +sorted_fields = [v for k,v in functional_sort( + [(int(re.search(r"(\d+)", x).group(1)), x) for x in fields], + lambda a, b: cmp(a[0], b[0]))] +#----------------------------- +entries = [line[:-1].split() for line in open("/etc/passwd")] + +for entry in sorted(entries, key=lambda x: (x[3], x[2], x[0])): + print entry +#----------------------------- + +# @@PLEAC@@_4.16 +#----------------------------- +import itertools +for process in itertools.cycle([1, 2, 3, 4, 5]): + print "Handling process", process + time.sleep(1) + +# pre 2.3: +import time +class Circular(object): + def __init__(self, data): + assert len(data) >= 1, "Cannot use an empty list" + self.data = data + + def __iter__(self): + while True: + for elem in self.data: + yield elem + +circular = Circular([1, 2, 3, 4, 5]) + +for process in circular: + print "Handling process", process + time.sleep(1) + +# DON'T DO THIS. All those pops and appends mean that the list needs to be +# constantly reallocated. This is rather bad if your list is large: +import time +class Circular(object): + def __init__(self, data): + assert len(data) >= 1, "Cannot use an empty list" + self.data = data + + def next(self): + head = self.data.pop(0) + self.data.append(head) + return head + +circular = Circular([1, 2, 3, 4, 5]) +while True: + process = circular.next() + print "Handling process", process + time.sleep(1) +#----------------------------- + +# @@PLEAC@@_4.17 +#----------------------------- +# generate a random permutation of mylist in place +import random +random.shuffle(mylist) +#----------------------------- + +# @@PLEAC@@_4.18 +#----------------------------- +import sys + +def make_columns(mylist, screen_width=78): + if mylist: + maxlen = max([len(elem) for elem in mylist]) + maxlen += 1 # to make extra space + + cols = max(1, screen_width/maxlen) + rows = 1 + len(mylist)/cols + + # pre-create mask for faster computation + mask = "%%-%ds " % (maxlen-1) + + for n in range(rows): + row = [mask%elem + for elem in mylist[n::rows]] + yield "".join(row).rstrip() + +for row in make_columns(sys.stdin.readlines(), screen_width=50): + print row + + +# A more literal translation +import sys + +# subroutine to check whether at last item on line +def EOL(item): + return (item+1) % cols == 0 + +# Might not be portable to non-linux systems +def getwinsize(): + # Use the curses module if installed + try: + import curses + stdscr = curses.initscr() + rows, cols = stdscr.getmaxyx() + return cols + except ImportError: + pass + + # Nope, so deal with ioctl directly. What value for TIOCGWINSZ? + try: + import termios + TIOCGWINSZ = termios.TIOCGWINSZ + except ImportError: + TIOCGWINSZ = 0x40087468 # This is Linux specific + + import struct, fcntl + s = struct.pack("HHHH", 0, 0, 0, 0) + try: + x = fcntl.ioctl(sys.stdout.fileno(), TIOCGWINSZ, s) + except IOError: + return 80 + rows, cols = struct.unpack("HHHH", x)[:2] + return cols + +cols = getwinsize() + +data = [s.rstrip() for s in sys.stdin.readlines()] +if not data: + maxlen = 1 +else: + maxlen = max(map(len, data)) + +maxlen += 1 # to make extra space + +# determine boundaries of screen +cols = (cols / maxlen) or 1 +rows = (len(data)+cols) / cols + +# pre-create mask for faster computation +mask = "%%-%ds " % (maxlen-1) + +# now process each item, picking out proper piece for this position +for item in range(rows * cols): + target = (item % cols) * rows + (item/cols) + if target < len(data): + piece = mask % data[target] + else: + piece = mask % "" + if EOL(item): + piece = piece.rstrip() # don't blank-pad to EOL + sys.stdout.write(piece) + if EOL(item): + sys.stdout.write("\n") + +if EOL(item): + sys.stdout.write("\n") +#----------------------------- + +# @@PLEAC@@_4.19 +#----------------------------- +def factorial(n): + s = 1 + while n: + s *= n + n -= 1 + return s +#----------------------------- +def permute(alist, blist=[]): + if not alist: + yield blist + for i, elem in enumerate(alist): + for elem in permute(alist[:i] + alist[i+1:], blist + [elem]): + yield elem + +for permutation in permute(range(4)): + print permutation +#----------------------------- +# DON'T DO THIS +import fileinput + +# Slightly modified from +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66463 +def print_list(alist, blist=[]): + if not alist: + print ' '.join(blist) + for i in range(len(alist)): + blist.append(alist.pop(i)) + print_list(alist, blist) + alist.insert(i, blist.pop()) + +for line in fileinput.input(): + words = line.split() + print_list(words) +#----------------------------- +class FactorialMemo(list): + def __init__(self): + self.append(1) + + def __call__(self, n): + try: + return self[n] + except IndexError: + ret = n * self(n-1) + self.append(ret) + return ret + +factorial = FactorialMemo() + +import sys +import time +sys.setrecursionlimit(10000) + +start = time.time() +factorial(2000) +f1 = time.time() - start +factorial(2100) # First 2000 values are cached already +f2 = time.time() - f1 - start +print "Slow first time:", f1 +print "Quicker the second time:", f2 +#----------------------------- + +class MemoizedPermutations(list): + def __init__(self, alist): + self.permute(alist, []) + + def permute(self, alist, blist): + if not alist: + self.append(blist) + for i, elem in enumerate(alist): + self.permute(alist[:i] + alist[i+1:], blist + [elem]) + + def __call__(self, seq, idx): + return [seq[n] for n in self[idx]] + + +p5 = MemoizedPermutations(range(5)) + +words = "This sentence has five words".split() +print p5(words, 17) +print p5(words, 81) +#----------------------------- + +# @@PLEAC@@_5.0 +#----------------------------- +# dictionaries +age = {"Nat": 24, + "Jules": 24, + "Josh": 17} +#----------------------------- +age = {} +age["Nat"] = 24 +age["Jules"] = 25 +age["Josh"] = 17 +#----------------------------- +food_color = {"Apple": "red", + "Banana": "yellow", + "Lemon": "yellow", + "Carrot": "orange" + } +#----------------------------- +# NOTE: keys must be quoted in Python + + +# @@PLEAC@@_5.1 +mydict[key] = value +#----------------------------- +# food_color defined per the introduction +food_color["Raspberry"] = "pink" +print "Known foods:" +for food in food_color: + print food + +#=> Known foods: +#=> Raspberry +#=> Carrot +#=> Lemon +#=> Apple +#=> Banana +#----------------------------- + +# @@PLEAC@@_5.2 +# does mydict have a value for key? +if key in mydict: + pass # it exists +else: + pass # it doesn't + +#----------------------------- +# food_color per the introduction +for name in ("Banana", "Martini"): + if name in food_color: + print name, "is a food." + else: + print name, "is a drink." + +#=> Banana is a food. +#=> Martini is a drink. +#----------------------------- +age = {} +age["Toddler"] = 3 +age["Unborn"] = 0 +age["Phantasm"] = None + +for thing in ("Toddler", "Unborn", "Phantasm", "Relic"): + print ("%s:"%thing), + if thing in age: + print "Exists", + if age[thing] is not None: + print "Defined", + if age[thing]: + print "True", + print +#=> Toddler: Exists Defined True +#=> Unborn: Exists Defined +#=> Phantasm: Exists +#=> Relic: +#----------------------------- +# Get file sizes for the requested filenames +import fileinput, os +size = {} +for line in fileinput.input(): + filename = line.rstrip() + if filename in size: + continue + size[filename] = os.path.getsize(filename) + + +# @@PLEAC@@_5.3 +# remove key and its value from mydict +del mydict[key] +#----------------------------- +# food_color as per Introduction +def print_foods(): + foods = food_color.keys() + + print "Keys:", " ".join(foods) + print "Values:", + + for food in foods: + color = food_color[food] + if color is not None: + print color, + else: + print "(undef)", + print + +print "Initially:" +print_foods() + +print "\nWith Banana set to None" +food_color["Banana"] = None +print_foods() + +print "\nWith Banana deleted" +del food_color["Banana"] +print_foods() + +#=> Initially: +#=> Keys: Carrot Lemon Apple Banana +#=> Values: orange yellow red yellow +#=> +#=> With Banana set to None +#=> Keys: Carrot Lemon Apple Banana +#=> Values: orange yellow red (undef) +#=> +#=> With Banana deleted +#=> Keys: Carrot Lemon Apple +#=> Values: orange yellow red +#----------------------------- +for key in ["Banana", "Apple", "Cabbage"]: + del food_color[key] +#----------------------------- + + +# @@PLEAC@@_5.4 +#----------------------------- +for key, value in mydict.items(): + pass # do something with key and value + +# If mydict is large, use iteritems() instead +for key, value in mydict.iteritems(): + pass # do something with key and value + +#----------------------------- +# DON'T DO THIS: +for key in mydict.keys(): + value = mydict[key] + # do something with key and value +#----------------------------- +# food_color per the introduction +for food, color in food_color.items(): + print "%s is %s." % (food, color) + +# DON'T DO THIS: +for food in food_color: + color = food_color[food] + print "%s is %s." % (food, color) + +#----------------------------- +print """%(food)s + +is + +%(color)s. +""" % vars() +#----------------------------- +for food, color in sorted(food_color.items()): + print "%s is %s." % (food, color) + +#----------------------------- +#!/usr/bin/env python +# countfrom - count number of messages from each sender + +import sys +if len(sys.argv) > 1: + infile = open(sys.argv[1]) +else: + infile = sys.stdin + +counts = {} +for line in infile: + if line.startswith("From: "): + name = line[6:-1] + counts[name] = counts.get(name, 0) + 1 + +for (name, count) in sorted(counts.items()): + print "%s: %s" % (name, count) + +#----------------------------- + + +# @@PLEAC@@_5.5 +for key, val in mydict.items(): + print key, "=>", val +#----------------------------- +print "\n".join([("%s => %s" % item) for item in mydict.items()]) +#----------------------------- +print mydict +#=> {'firstname': 'Andrew', 'login': 'dalke', 'state': 'New Mexico', 'lastname': 'Dalke'} +#----------------------------- +import pprint +pprint.pprint(dict) +#=> {'firstname': 'Andrew', +#=> 'lastname': 'Dalke', +#=> 'login': 'dalke', +#=> 'state': 'New Mexico'} +#----------------------------- + + +# @@PLEAC@@_5.6 +#----------------------------- +class SequenceDict(dict): + """ + Dictionary that remembers the insertion order. + The lists returned by keys(), values() and items() are + in the insertion order. + """ + def __init__(self, *args): + self._keys={} # key --> id + self._ids={} # id --> key + self._next_id=0 + + def __setitem__(self, key, value): + self._keys[key]=self._next_id + self._ids[self._next_id]=key + self._next_id+=1 + return dict.__setitem__(self, key, value) + + def __delitem__(self, key): + id=self._keys[key] + del(self._keys[key]) + del(self._ids[id]) + return dict.__delitem__(self, key) + + def values(self): + values=[] + ids=list(self._ids.items()) + ids.sort() + for id, key in ids: + values.append(self[key]) + return values + + def items(self): + items=[] + ids=list(self._ids.items()) + ids.sort() + for id, key in ids: + items.append((key, self[key])) + return items + + def keys(self): + ids=list(self._ids.items()) + ids.sort() + keys=[] + for id, key in ids: + keys.append(key) + return keys + + def update(self, d): + for key, value in d.items(): + self[key]=value + + def clear(self): + dict.clear(self) + self._keys={} + self._ids={} + self._next_id=0 + +def testSequenceDict(): + sd=SequenceDict() + + # First Test + sd[3]="first" + sd[2]="second" + sd[1]="third" + print sd.keys() + print sd.items() + print sd.values() + + del(sd[1]) + del(sd[2]) + del(sd[3]) + + print sd.keys(), sd.items(), sd.values() + print sd._ids, sd._keys + + print "---------------" + # Second Test + sd["b"]="first" + sd["a"]="second" + sd.update({"c": "third"}) + print sd.keys() + print sd.items() + print sd.values() + + del(sd["b"]) + del(sd["a"]) + del(sd["c"]) + + print sd.keys(), sd.items(), sd.values() + print sd._ids, sd._keys + +def likePerlCookbook(): + food_color=SequenceDict() + food_color["Banana"]="Yellow"; + food_color["Apple"]="Green"; + food_color["Lemon"]="Yellow" + print "In insertion order, the foods' color are:" + for food, color in food_color.items(): + print "%s is colored %s" % (food, color) + +if __name__=="__main__": + #testSequenceDict() + likePerlCookbook() + + +# @@PLEAC@@_5.7 +import os +ttys = {} + +who = os.popen("who") + +for line in who: + user, tty = line.split()[:2] + ttys.setdefault(user, []).append(tty) + +for (user, tty_list) in sorted(ttys.items()): + print user + ": " + " ".join(tty_list) +#----------------------------- +import pwd +for (user, tty_list) in ttys.items(): + print user + ":", len(tty_list), "ttys." + for tty in sorted(tty_list): + try: + uid = os.stat("/dev/" + tty).st_uid + user = pwd.getpwuid(uid)[0] + except os.error: + user = "(not available)" + print "\t%s (owned by %s)" % (tty, user) + +# @@PLEAC@@_5.8 +# lookup_dict maps keys to values +reverse = dict([(val, key) for (key, val) in lookup_dict.items()]) +#----------------------------- +surname = {"Mickey": "Mantle", "Babe": "Ruth"} +first_name = dict([(last, first) for (first, last) in surname.items()]) + +print first_name["Mantle"] +#=> Mickey +#----------------------------- +#!/usr/bin/perl -w +# foodfind - find match for food or color + +import sys +if not sys.argv[1:]: + raise SystemExit("usage: foodfind food_or_color") +given = sys.argv[1] + +color_dict = {"Apple": "red", + "Banana": "yellow", + "Lemon": "yellow", + "Carrot": "orange", + } +food_dict = dict([(color, food) for (food, color) in color_dict.items()]) + +if given in color_dict: + print given, "is a food with color", color_dict[given] +elif given in food_dict: + print food_dict[given], "is a food with color", given +#----------------------------- +# food_color as per the introduction +foods_with_color = {} +for food, color in food_color.items(): + foods_with_color.setdefault(color, []).append(food) + +print " ".join(foods_with_color["yellow"]), "were yellow foods." +#----------------------------- + +# @@PLEAC@@_5.9 +#----------------------------- +# mydict is the hash to sort +for key, value in sorted(mydict.items()): + # do something with key, value +#----------------------------- +# food_color as per section 5.8 +for food, color in sorted(food_color.items()): + print "%s is %s." % (food, color) +#----------------------------- +# NOTE: alternative version +for item in sorted(food_color.items()): + print "%s is %s." % item +#----------------------------- +# NOTE: alternative version showing a user-defined function +def food_cmp(x, y): + return cmp(x, y) + +for food, color in sorted(food_color, cmp=food_cmp): + print "%s is %s." % (food, color) +#----------------------------- +def food_len_cmp(x, y): + return cmp(len(x), len(y)) + +for food in sorted(food_color, cmp=food_len_cmp): + print "%s is %s." % (food, food_color[food]) + +# In this instance, however, the following is both simpler and faster: +for food in sorted(food_color, key=len): + print "%s is %s." % (food, food_color[food]) +#----------------------------- + + +# @@PLEAC@@_5.10 +#----------------------------- +merged = {} +merged.update(a_dict) +merged.update(b_dict) + +#----------------------------- +# NOTE: alternative version +merged = a_dict.copy() +merged.update(b_dict) +#----------------------------- +# DON'T DO THIS: + +merged = {} +for k, v in a_dict.items(): + merged[k] = v +for k, v in b_dict.items(): + merged[k] = v +#----------------------------- +# food_color as per section 5.8 +drink_color = {"Galliano": "yellow", + "Mai Tai": "blue"} + +ingested_color = drink_color.copy() +ingested_color.update(food_color) +#----------------------------- +# DON'T DO THIS: +drink_color = {"Galliano": "yellow", + "Mai Tai": "blue"} + +substance_color = {} +for k, v in food_color.items(): + substance_color[k] = v +for k, v in drink_color.items(): + substance_color[k] = v +#----------------------------- +# DON'T DO THIS: +substance_color = {} +for mydict in (food_color, drink_color): + for k, v in mydict: + substance_color[k] = v +#----------------------------- +# DON'T DO THIS: +substance_color = {} +for item in food_color.items() + drink_color.items(): + for k, v in mydict: + substance_color[k] = v +#----------------------------- +substance_color = {} +for mydict in (food_color, drink_color): + for k, v in mydict.items(): + if substance_color.has_key(k): + print "Warning:", k, "seen twice. Using the first definition." + continue + substance_color[k] = v + +# I think it's a copy, in which case +all_colors = new_colors.copy() + + +# @@PLEAC@@_5.11 +common = [k for k in dict1 if k in dict2] +#----------------------------- +this_not_that = [k for k in dict1 if k not in dict2] +#----------------------------- +# citrus_color is a dict mapping citrus food name to its color. +citrus_color = {"Lemon": "yellow", + "Orange": "orange", + "Lime": "green"} + +# build up a list of non-citrus foods +non_citrus = [k for k in food_color if k not in citruscolor] +#----------------------------- + +# @@PLEAC@@_5.12 +#----------------------------- +# references as keys of dictionaries is no pb in python + +name = {} +for filename in ("/etc/termcap", "/vmunix", "/bin/cat"): + try: + myfile = open(filename) + except IOError: + pass + else: + names[myfile] = filename + +print "open files:", ", ".join(name.values()) +for f, fname in name.items(): + f.seek(0, 2) # seek to the end + print "%s is %d bytes long." % (fname, f.tell()) +#----------------------------- + +# @@PLEAC@@_5.13 +# Python doesn't allow presizing of dicts, but hashing is efficient - +# it only re-sizes at intervals, not every time an item is added. + +# @@PLEAC@@_5.14 +count = {} +for element in mylist: + count[element] = count.get(element, 0) + 1 + +# @@PLEAC@@_5.15 +#----------------------------- +import fileinput + +father = {'Cain': 'Adam', + 'Abel': 'Adam', + 'Seth': 'Adam', + 'Enoch': 'Cain', + 'Irad': 'Enoch', + 'Mehujael': 'Irad', + 'Methusael': 'Mehujael', + 'Lamech': 'Methusael', + 'Jabal': 'Lamech', + 'Tubalcain': 'Lamech', + 'Enos': 'Seth', + } + +for line in fileinput.input(): + person = line.rstrip() + while person: # as long as we have people, + print person, # print the current name + person = father.get(person) # set the person to the person's father + print + +#----------------------------- +import fileinput + +children = {} +for k, v in father.items(): + children.setdefault(v, []).append(k) + +for line in fileinput.input(): + person = line.rstrip() + kids = children.get(person, ["nobody"]) + print person, "begat", ", ".join(kids) + +#----------------------------- +import sys, re +pattern = re.compile(r'^\s*#\s*include\s*<([^>]+)') +includes = {} +for filename in filenames: + try: + infile = open(filename) + except IOError, err: + print>>sys.stderr, err + continue + for line in infile: + match = pattern.match(line) + if match: + includes.setdefault(match.group(1), []).append(filename) +#----------------------------- +# list of files that don't include others +mydict = {} +for e in reduce(lambda a,b: a + b, includes.values()): + if not includes.has_key(e): + mydict[e] = 1 +include_free = mydict.keys() +include_free.sort() + +# @@PLEAC@@_5.16 +#----------------------------- +#!/usr/bin/env python -w +# dutree - print sorted indented rendition of du output +import os, sys + +def get_input(args): + # NOTE: This is insecure - use only from trusted code! + cmd = "du " + " ".join(args) + infile = os.popen(cmd) + + dirsize = {} + kids = {} + for line in infile: + size, name = line[:-1].split("\t", 1) + dirsize[name] = int(size) + parent = os.path.dirname(name) + kids.setdefault(parent, []).append(name) + # Remove the last field added, which is the root + kids[parent].pop() + if not kids[parent]: + del kids[parent] + + return name, dirsize, kids + +def getdots(root, dirsize, kids): + size = cursize = dirsize[root] + if kids.has_key(root): + for kid in kids[root]: + cursize -= dirsize[kid] + getdots(kid, dirsize, kids) + if size != cursize: + dot = root + "/." + dirsize[dot] = cursize + kids[root].append(dot) + +def output(root, dirsize, kids, prefix = "", width = 0): + path = os.path.basename(root) + size = dirsize[root] + fmt = "%" + str(width) + "d %s" + line = fmt % (size, path) + print prefix + line + + prefix += (" " * (width-1)) + "| " + (" " * len(path)) + + if kids.has_key(root): + kid_list = kids[root] + kid_list.sort(lambda x, y, dirsize=dirsize: + cmp(dirsize[x], dirsize[y])) + width = len(str(dirsize[kid_list[-1]])) + for kid in kid_list: + output(kid, dirsize, kids, prefix, width) + +def main(): + root, dirsize, kids = get_input(sys.argv[1:]) + getdots(root, dirsize, kids) + output(root, dirsize, kids) + +if __name__ == "__main__": + main() + + +# @@PLEAC@@_6.0 +# Note: regexes are used less often in Python than in Perl as tasks are often +# covered by string methods, or specialised objects, modules, or packages. + +import re # "re" is the regular expression module. +re.search("sheep",meadow) # returns a MatchObject is meadow contains "sheep". +if not re.search("sheep",meadow): + print "no sheep on this meadow only a fat python." +# replacing strings is not done by "re"gular expressions. +meadow = meadow.replace("old","new") # replace "old" with "new" and assign result. +#----------------------------- +re.search("ovine",meadow) + +meadow = """Fine bovines demand fine toreadors. +Muskoxen are polar ovibovine species. +Grooviness went out of fashion decades ago.""" + +meadow = "Ovines are found typically in ovaries." + +if re.search(r"\bovines\b",meadow,re.I) : print "Here be sheep!" +#----------------------------- +# The tricky bit +mystr = "good food" +re.sub("o*","e",mystr,1) # gives 'egood food' + +echo ababacaca | python -c "import sys,re; print re.search('(a|ba|b)+(a|ac)+',sys.stdin.read()).group()" +#----------------------------- +# pattern matching modifiers +# assume perl code iterates over some file +import re, fileinput +for ln = fileinput.input(): + fnd = re.findall("(\d+)",ln) + if len(fnd) > 0: + print "Found number %s" % (fnd[0]) +# ---------------------------- +digits = "123456789" +nonlap = re.findall("(\d\d\d)", digits) +yeslap = ["not yet"] +print "Non-overlapping:",",".join(nonlap) +print "Overlapping :",",".join(yeslap) +# ---------------------------- +mystr = "And little lambs eat ivy" +fnd = re.search("(l[^s]*s)", mystr) +print "(%s) (%s) (%s)" % (mystr[:fnd.start()], fnd.group(), mystr[fnd.end():]) +# (And ) (little lambs) ( eat ivy) + + +# @@PLEAC@@_6.1 +import re +dst = re.sub("this","that",src) +#----------------------------- +# strip to basename +basename = re.sub(".*/(?=[^/]+)","",progname) + +# Make All Words Title-Cased +# DON'T DO THIS - use str.title() instead +def cap(mo): return mo.group().capitalize() +re.sub("(?P<n>\w+)",cap,"make all words title-cased") + +# /usr/man/man3/foo.1 changes to /usr/man/cat3/foo.1 +manpage = "/usr/man/man3/foo.1" +catpage = re.sub("man(?=\d)","cat",manpage) +#----------------------------- +bindirs = "/usr/bin /bin /usr/local/bin".split() +libdirs = [d.replace("bin", "lib") for d in bindirs] + +print " ".join(libdirs) +#=> /usr/lib /lib /usr/local/lib +#----------------------------- +# strings are never modified in place. +#----------------------------- + +# @@PLEAC@@_6.2 +##--------------------------- + +# DON'T DO THIS. use line[:-1].isalpha() [this probably goes for the +# remainder of this section too!] +import re +if re.match("^[A-Za-z]+$",line): + print "pure alphabetic" +##--------------------------- +if re.match(r"^[^\W\d_]+$", line, re.LOCALE): + print "pure alphabetic" +##--------------------------- +import re +import locale + +try: + locale.setlocale(locale.LC_ALL, 'fr_CA.ISO8859-1') +except: + print "couldn't set locale to French Cnadian" + raise SystemExit + +DATA=""" +silly +façade +coöperate +niño +Renée +Molière +hæmoglobin +naïve +tschüß +random!stuff#here +""" + +for ln in DATA.split(): + ln = ln.rstrip() + if re.match(r"^[^\W\d_]+$",ln,re.LOCALE): + print "%s: alphabetic" % (ln) + else: + print "%s: line noise" % (ln) +# although i dont think "coöperate" should be in canadian +##--------------------------- + +# @@PLEAC@@_6.3 +# Matching Words +"\S+" # as many non-whitespace bytes as possible +"[A-Za-z'-]+" # as many letters, apostrophes, and hyphens + +# string split is similar to splitting on "\s+" +"A text with some\tseparator".split() + +"\b*([A-Za-z]+)\b*" # word boundaries +"\s*([A-Za-z]+)\s*" # might work too as on letters are allowed. + +re.search("\Bis\B","this thistle") # matches on thistle not on this +re.search("\Bis\B","vis-a-vis") # does not match + +# @@PLEAC@@_6.4 +#----------------------------- +#!/usr/bin/python +# resname - change all "foo.bar.com" style names in the input stream +# into "foo.bar.com [204.148.40.9]" (or whatever) instead + +import socket # load inet_addr +import fileinput +import re + +match = re.compile("""(?P<hostname> # capture hostname + (?: # these parens for grouping only + [\w-]+ # hostname component + \. # ant the domain dot + ) + # now repeat that whole thing a bunch of times + [A-Za-z] # next must be a letter + [\w-] + # now trailing domain part + ) # end of hostname capture + """,re.VERBOSE) # for nice formatting + +def repl(match_obj): + orig_hostname = match_obj.group("hostname") + try: + addr = socket.gethostbyname(orig_hostname) + except socket.gaierror: + addr = "???" + return "%s [%s]" % (orig_hostname, addr) + +for ln in fileinput.input(): + print match.sub(repl, ln) +#----------------------------- +re.sub("""(?x) # nicer formatting + \# # a pound sign + (\w+) # the variable name + \# # another pound sign + """, + lambda m: eval(m.group(1)), # replace with the value of the global variable + line + ) +##----------------------------- +re.sub("""(?x) # nicer formatting + \# # a pound sign + (\w+) # the variable name + \# # another pound sign + """, + lambda m: eval(eval(m.group(1))), # replace with the value of *any* variable + line + ) +##----------------------------- + +# @@PLEAC@@_6.5 +import re +pond = "one fish two fish red fish blue fish" +fishes = re.findall(r"(?i)(\w+)\s+fish\b",pond) +if len(fishes)>2: + print "The third fish is a %s one." % (fishes[2]) +##----------------------------- +re.findall(r"(?i)(?:\w+\s+fish\s+){2}(\w+)\s+fish",pond) +##----------------------------- +count = 0 +for match_object in re.finditer(r"PAT", mystr): + count += 1 # or whatever you want to do here + +# "progressive" matching might be better if one wants match 5 from 50. +# to count use +count = len(re.findall(r"PAT",mystr)) +count = len(re.findall(r"aba","abaababa")) + +# "count" overlapping matches +count = len(re.findall(r"(?=aba)","abaababa")) + +# FASTEST non-overlapping might be str.count +"abaababa".count("aba") +##----------------------------- +pond = "one fish two fish red fish blue fish" +colors = re.findall(r"(?i)(\w+)\s+fish\b",pond) # get all matches +color = colors[2] # then the one we want + +# or without a temporary list +color = re.findall(r"(?i)(\w+)\s+fish\b",pond)[2] # just grab element 3 + +print "The third fish in the pond is %s." % (color) +##----------------------------- +import re + +pond = "one fish two fish red fish blue fish" +matches = re.findall(r"(\w+)\s+fish\b",pond) +evens = [fish for (i, fish) in enumerate(matches) if i%2] +print "Even numbered fish are %s." % (" ".join(evens)) +##----------------------------- +count = 0 +def four_is_sushi(match_obj): + global count + count += 1 + if count==4: + return "sushi%s" % (match_obj.group(2)) + return "".join(match_obj.groups()) + +re.sub(r"""(?x) # VERBOSE + \b # makes next \w more efficient + ( \w+ ) # this is what we'll be changing + ( + \s+ fish \b + )""", + four_is_sushi, + pond) +# one fish two fish red fish sushi fish +##----------------------------- +# greedily +last_fish = re.findall(r"(?i).*\b(\w+)\s+fish\b",pond) +##----------------------------- +pond = "One fish two fish red fish blue fish swim here" +color = re.findall(r"(?i)\b(\w+)\s+fish\b",pond)[-1] +print "Last fish is "+color+"." +# FASTER using string. +lastfish = pond.rfind("fish") +color = pond[:lastfish].split()[-1] +##----------------------------- +r"""(?x) + A # find some pattern A + (?! # mustn't be able to find + .* # something + A # and A + ) + $ # through the end of string + """ + +pond = "One fish two fish red fish blue fish swim here" +fnd = re.findall(r"""(?xis) # VERBOSE, CASEINSENSITIVE, DOTALL + \b ( \w+ ) \s+ fish \b + (?! .* \b fish \b )""", + pond) +if len(fnd): + print "Last fish is %s." % (fnd[0]) +else: + print "Failed!" + + +# @@PLEAC@@_6.6 +# Matching Multiple Lines +# +#!/usr/bin/python +# killtags - very bad html tag killer +import re +import sys + +text = open(sys.argv[1]).read() # read the whole file +text = re.sub("(?ms)<.*?>","",text) # strip tags (terrible +print text +## ---------------------------- +#!/usr/bin/python +# headerfy: change certain chapter headers to html +import sys, re + +match = re.compile(r"""(?xms) # re.VERBOSE, re.MULTILINE, and re.DOTALL + \A # start of the string + (?P<chapter> # capture in g<chapter> + Chapter # literal string + \s+ # mandatory whitespace + \d+ # decimal number + \s* # optional whitespace + : # a real colon + . * # anything not a newline till end of line + ) + """) +text = open(sys.argv[1]).read() # read the whole file +for paragraph in text.split("\n"): # split on unix end of lines + p = match.sub("<h1>\g<chapter></h1>",paragraph) + print p +## ---------------------------- +# the one liner does not run. +# python -c 'import sys,re; for p in open(sys.argv[1]).read().split("\n\n"): print re.sub(r"(?ms)\A(Chapter\s+\d+\s*:.*)","<h1>\g0</h1>",p)' +## ---------------------------- +match = re.compile(r"(?ms)^START(.*?)^END") + # s makes . span line boundaries + # m makes ^ match at the beginning of the string and at the beginning of each line + +chunk = 0 +for paragraph in open(sys.argv[1]).read().split("\n\n"): + chunk += 1 + fnd = match.findall(paragraph) + if fnd: + print "chunk %d in %s has <<%s>>" % (chunk,sys.argv[1],">>,<<".join(fnd)) +## ---------------------------- + +# @@PLEAC@@_6.7 +import sys +# Read the whole file and split +chunks = open(sys.argv[1]).read().split() # on whitespace +chunks = open(sys.argv[1]).read().split("\n") # on line ends + +# splitting on pattern +import re +pattern = r"x" +chunks = re.split(pattern, open(sys.argv[1]).read()) +##----------------------------- +chunks = re.split(r"(?m)^\.(Ch|Se|Ss)$",open(sys.argv[1]).read()) +print "I read %d chunks." % (len(chunks)) +# without delimiters +chunks = re.split(r"(?m)^\.(?:Ch|Se|Ss)$",open(sys.argv[1]).read()) + +# with delimiters +chunks = re.split(r"(?m)^(\.(?:Ch|Se|Ss))$",open(sys.argv[1]).read()) + +# with delimiters at chunkstart +chunks = re.findall(r"""(?xms) # multiline, dot matches lineend, allow comments + ((?:^\.)? # consume the separator if present + .*?) # match everything but not greedy + (?= # end the match on this but dont consume it + (?: # dont put into group [1] + ^\.(?:Ch|Se|Ss)$ # either end on one of the roff commands + |\Z # or end of text + ) + )""", + open(sys.argv[1]).read()) +# [1] if "?:" is removed the result holds tuples: ('.Ch\nchapter x','.Ch') +# which might be more usefull. + +# @@PLEAC@@_6.8 +##----------------------------- +# Python doesn't have perl's range operators +# If you want to only use a selected line range, use enumerate +# (though note that indexing starts at zero: +for i, line in enumerate(myfile): + if firstlinenum <= i < lastlinenum: + dosomethingwith(line) + +# Using patterned ranges is slightly trickier - +# You need to search for the first pattern then +# search for the next pattern: +import re +for line in myfile: + if re.match(pat1, line): + break + +dosomethingwith(line) # Only if pat1 can be on same line as pat2 + +for line in myfile: + if re.match(pat2, line): + break + dosomethingwith(line) +##----------------------------- +# If you need to extract ranges a lot, the following generator funcs +# may be useful: +def extract_range(myfile, start, finish): + for i, line in enumerate(myfile): + if start <= i < finish: + yield line + elif i == finish: + break + +for line in extract_range(open("/etc/passwd"), 3, 5): + print line + +def patterned_range(myfile, startpat, endpat=None): + startpat = re.compile(startpat) + if endpat is not None: + endpat = re.compile(endpat) + in_range = False + for line in myfile: + if re.match(startpat, line): + in_range = True + if in_range: + yield line + if endpat is not None and re.match(endpat, line): + break + +# DO NOT DO THIS. Use the email module instead +for line in patterned_range(msg, "^From:?", "^$"): + pass #... + + +# @@PLEAC@@_6.9 +tests = (("list.?",r"^list\..$"), + ("project.*",r"^project\..*$"), + ("*old",r"^.*old$"), + ("type*.[ch]",r"^type.*\.[ch]$"), + ("*.*",r"^.*\..*$"), + ("*",r"^.*$"), + ) + +# The book says convert "*","?","[","]" all other characters will be quoted. +# The book uses "\Q" which escapes any characters that would otherwise be +# treated as regular expression. +# Escaping every char fails as "\s" is not "s" in a regex. + +def glob2pat(globstr): + pat = globstr.replace("\\",r"\\") + pat = pat.replace(".",r"\.").replace("?",r".").replace("*",r".*") + + return "^"+pat+"$" + +for globstr, patstr in tests: + g2p = glob2pat(globstr) + if g2p != patstr: + print globstr, "failed! Should be", patstr, "but was", g2p + + +# @@PLEAC@@_6.10 + +# download the following standalone program +#!/usr/bin/python +# popgrep1 - grep for abbreviations of places that say "pop" +# version 1: slow but obvious way +import fileinput +import re +popstates = ["CO","ON","MI","WI","MN"] +for line in fileinput.input(): + for state in popstates: + if re.search(r"\b"+state+r"\b",line): + print line + + + +#----------------------------- +# download the following standalone program +#!/usr/bin/python +# popgrep2 - grep for abbreviations of places that say "pop" +# version 2: compile the patterns +import fileinput +import re +popstates = ["CO","ON","MI","WI","MN"] +state_re = [] +for state in popstates: + state_re.append(re.compile(r"\b"+state+r"\b")) +for line in fileinput.input(): + for state in state_re: + if state.search(line): + print line + + +#----------------------------- +# download the following standalone program +#!/usr/bin/python +# popgrep3 - grep for abbreviations of places that say "pop" +# version 3: compile a single pattern +import fileinput +import re +popstates = ["CO","ON","MI","WI","MN"] +state_re = re.compile(r"\b(?:"+"|".join(popstates)+r")\b") +for line in fileinput.input(): + if state_re.search(line): + print line + + +#----------------------------- +# download the following standalone program +#!/usr/bin/python +# grepauth - print lines that mention both Tom and Nat +import fileinput +import re + +def build_match_any(words): + return re.compile("|".join(words)) +def uniq(arr): + seen = {} + for item in arr: + seen[item] = seen.get(item, 0) + 1 + return seen.keys() +def build_match_all(words): + r = re.compile("|".join(words)) + c = lambda line: len(uniq(r.findall(line)))>=len(words) + return c + +any = build_match_any(("Tom","Nat")) +all = build_match_all(("Tom","Nat")) +for line in fileinput.input(): + if any.search(line): + print "any:", line + if all(line): + print "all:", line + + + +#----------------------------- + + +# @@PLEAC@@_6.11 +# Testing for a Valid Pattern + +import re +while True: + pat = raw_input("Pattern? ") + try: + re.compile(pat) + except re.error, err: + print "INVALID PATTERN", err + continue + break + +# ---- +def is_valid_pattern(pat): + try: + re.compile(pat) + except re.error: + return False + return True + +# ---- + +# download the following standalone program +#!/usr/bin/python +# paragrep - trivial paragraph grepper +# +# differs from perl version in parano. +# python version displays paragraph in current file. + +import sys, os.path, re +if len(sys.argv)<=1: + print "usage: %s pat [files]\n" % sys.argv[0] + sys.exit(1) + +pat = sys.argv[1] +try: + pat_re = re.compile(pat) +except: + print "%s: bad pattern %s: %s" % (sys.argv[1], pat, sys.exc_info()[1]) + sys.exit(1) +for filename in filter(os.path.isfile,sys.argv[2:]): + parano = 0 + for para in open(filename).read().split("\n\n"): + parano += 1 + if pat_re.search(para): + print filename, parano, para, "\n" + + + +# ---- + +# as we dont evaluate patterns the attack :: +# +# $pat = "You lose @{[ system('rm -rf *']} big here"; +# +# does not work. + + +# @@PLEAC@@_6.12 + +# download the following standalone program +#!/usr/bin/python +# localeg - demonstrates locale effects +# +# re must be told to respect locale either in the regexp +# "(?L)" or as flag to the call (python 2.4) "re.LOCALE". + +import sys +import re, string +from locale import LC_CTYPE, setlocale, getlocale + +name = "andreas k\xF6nig" +locale = {"German" : "de_DE.ISO_8859-1", "English" : "en_US"} +# us-ascii is not supported on linux py23 +# none works in activestate py24 + +try: + setlocale(LC_CTYPE, locale["English"]) +except: + print "Invalid locale %s" % locale["English"] + sys.exit(1) +english_names = [] +for n in re.findall(r"(?L)\b(\w+)\b",name): + english_names.append(n.capitalize()) + +try: + setlocale(LC_CTYPE, locale["German"]) +except: + print "Invalid locale %s" % locale["German"] + sys.exit(1) +german_names = map(string.capitalize, re.findall(r"(?L)\b(\w+)\b",name)) + +print "English names: %s" % " ".join(english_names) +print "German names: %s" % " ".join(german_names) + + +# @@PLEAC@@_6.13 +##----------------------------- +import difflib +matchlist = ["ape", "apple", "lapel", "peach", "puppy"] +print difflib.get_close_matches("appel", matchlist) +#=> ['lapel', 'apple', 'ape'] +##----------------------------- +# Also see: +# http://www.personal.psu.edu/staff/i/u/iua1/python/apse/ +# http://www.bio.cam.ac.uk/~mw263/pyagrep.html + +# @@PLEAC@@_6.14 +##----------------------------- +# To search (potentially) repeatedly for a pattern, use re.finditer(): + +# DO NOT DO THIS. Split on commas and convert elems using int() +mystr = "3,4,5,9,120" +for match in re.finditer("(\d+)", mystr): + n = match.group(0) + if n == "9": + break # '120' will never be matched + print "Found number", n + +# matches know their end position +mystr = "The year 1752 lost 10 days on the 3rd of September" +x = re.finditer("(\d+)", mystr) +for match in x: + n = match.group(0) + print "Found number", n + +tail = re.match("(\S+)", mystr[match.end():]) +if tail: + print "Found %s after the last number."%tail.group(0) + + +# @@PLEAC@@_6.15 +# Python's regexes are based on Perl's, so it has the non-greedy +# '*?', '+?', and '??' versions of '*', '+', and '?'. +# DO NOT DO THIS. import htmllib, formatter, etc, instead +#----------------------------- +# greedy pattern +txt = re.sub("<.*>", "", txt) # try to remove tags, very badly + +# non-greedy pattern +txt = re.sub("<.*?>", "", txt) # try to remove tags, still rather badly +#----------------------------- +txt = "<b><i>this</i> and <i>that</i> are important</b> Oh, <b><i>me too!</i></b>" + +print re.findall("<b><i>(.*?)</i></b>", txt +##----------------------------- +print re.findall("/BEGIN((?:(?!BEGIN).)*)END/", txt) +##----------------------------- +print re.findall("<b><i>((?:(?!<b>|<i>).)*)</i></b>", txt) +##----------------------------- +print re.findall("<b><i>((?:(?!<[ib]>).)*)</i></b>", txt) +##----------------------------- +print re.findall(""" + <b><i> + [^<]* # stuff not possibly bad, and not possibly the end. + (?: # at this point, we can have '<' if not part of something bad + (?! </?[ib]> ) # what we can't have + < # okay, so match the '<' + [^<]* # and continue with more safe stuff + ) * + </i></b> + """, re.VERBOSE, txt) +##----------------------------- + +# @@PLEAC@@_6.16 +##----------------------------- +text = """ +This is a test +test of the duplicate word finder. +""" +words = text.split() +for curr, next in zip(words[:-1], words[1:]): + if curr.upper() == next.upper(): + print "Duplicate word '%s' found." % curr + +# DON'T DO THIS +import re +pat = r""" + \b # start at a word boundary (begin letters) + (\S+) # find chunk of non-whitespace + \b # until another word boundary (end letters) + ( + \s+ # separated by some whitespace + \1 # and that very same chunk again + \b # until another word boundary + ) + # one or more sets of those + """ +for match in re.finditer(pat, text, flags=re.VERBOSE|re.IGNORECASE): + print "Duplicate word '%s' found." % match.group(1) +##----------------------------- +a = 'nobody'; +b = 'bodysnatcher'; + +text = a+" "+b +pat = r"^(\w+)(\w+) \2(\w+)$" +for match in re.finditer(pat, text): + m1, m2, m3 = match.groups() + print m2, "overlaps in %s-%s-%s"%(m1, m2, m3) +##----------------------------- +pat = r"^(\w+?)(\w+) \2(\w+)$" +##----------------------------- +try: + while True: + factor = re.match(r"^(oo+?)\1+$", n).group(1) + n = re.sub(factor, "o", n) + print len(factor) +except AttributeError: + print len(n) +##----------------------------- +def diaphantine(n, x, y, z): + pat = r"^(o*)\1{%s}(o*)\2{%s}(o*)\3{%s}$"%(x-1, y-1, z-1) + text = "o"*n + try: + vals = [len(v) for v in re.match(pat, text).groups()] + except ValueError: + print "No solutions." + else: + print "One solution is: x=%s, y=%s, z=%s."%tuple(vals) + +diaphantine(n=281, x=12, y=15, z=16) + +# @@PLEAC@@_6.17 +##----------------------------- +# Pass any of the following patterns to re.match(), etc +pat = "ALPHA|BETA" +pat = "^(?=.*ALPHA)(?=.*BETA)" +pat = "ALPHA.*BETA|BETA.*ALPHA" +pat = "^(?:(?!PAT).)*$" +pat = "(?=^(?:(?!BAD).)*$)GOOD" +##----------------------------- +if not re.match(pattern, text): + something() +##----------------------------- +if re.match(pat1, text) and re.match(pat2, text): + something() +##----------------------------- +if re.match(pat1, text) or re.match(pat2, text): + something() +##----------------------------- +# DON'T DO THIS. +"""minigrep - trivial grep""" +import sys, re + +pat = sys.argv[1] +for line in sys.stdin: + if re.match(pat, line): + print line[:-1] +##----------------------------- +if re.match(r"^(?=.*bell)(?=.*lab)", "labelled"): + something() +##----------------------------- +if re.search("bell", s) and re.search("lab", s): + something() +##----------------------------- +if re.match(""" + ^ # start of string + (?= # zero-width lookahead + .* # any amount of intervening stuff + bell # the desired bell string + ) # rewind, since we were only looking + (?= # and do the same thing + .* # any amount of intervening stuff + lab # and the lab part + ) + """, + murray_hill, + re.DOTALL | re.VERBOSE): + print "Looks like Bell Labs might be in Murray Hill!" +##----------------------------- +if re.match(r"(?:^.*bell.*lab)|(?:^.*lab.*bell)", "labelled"): + something() +##----------------------------- +brand = "labelled" +if re.match(""" + (?: # non-capturing grouper + ^ .*? # any amount of stuff at the front + bell # look for a bell + .*? # followed by any amount of anything + lab # look for a lab + ) # end grouper + | # otherwise, try the other direction + (?: # non-capturing grouper + ^ .*? # any amount of stuff at the front + lab # look for a lab + .*? # followed by any amount of anything + bell # followed by a bell + ) # end grouper + """, + brand, + re.DOTALL | re.VERBOSE): + print "Our brand has bell and lab separate." +##----------------------------- +x = "odlaw" +if re.match("^(?:(?!waldo).)*$", x): + print "There's no waldo here!" +##----------------------------- +if re.match(""" + ^ # start of string + (?: # non-capturing grouper + (?! # look ahead negation + waldo # is he ahead of us now? + ) # is so, the negation failed + . # any character (cuzza /s) + ) * # repeat that grouping 0 or more + $ # through the end of the string + """, + x, + re.VERBOSE | re.DOTALL): + print "There's no waldo here!\n"; +##----------------------------- + +# @@PLEAC@@_6.18 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_6.19 +##----------------------------- +from email._parseaddr import AddressList + +print AddressList("fred&barney@stonehenge.com").addresslist[0] + +print AddressList("fred&barney@stonehenge.com (Hanna Barbara)").addresslist[0] + +name, address = AddressList("Mr Fooby Blah <me@nowhere.com>").addresslist[0] +print "%s's address is '%s'"%(name, address) + +# @@PLEAC@@_6.20 +##----------------------------- +# Assuming the strings all start with different letters, or you don't +# mind there being precedence, use the startswith string method: + +def get_action(answer): + answer = answer.lower() + actions = ["send", "stop", "abort", "list", "end"] + for action in actions: + if action.startswith(answer): + return action + +print "Action is %s."%get_action("L") +#=> Action is list. +##----------------------------- +#DON'T DO THIS: +import re +answer = "ab" +answer = re.escape(answer.strip()) +for action in ("SEND", "STOP", "ABORT", "LIST", "EDIT"): + if re.match(answer, action, flags=re.IGNORECASE): + print "Action is %s."%action.lower() +##----------------------------- +import re, sys +def handle_cmd(cmd): + cmd = re.escape(cmd.strip()) + for name, action in {"edit": invoke_editor, + "send": deliver_message, + "list": lambda: system(pager, myfile), + "abort": sys.exit, + } + if re.match(cmd, name, flags=re.IGNORECASE): + action() + break + else: + print "Unknown command:", cmd +handle_cmd("ab") + +# @@PLEAC@@_6.21 +##----------------------------- +# urlify - wrap HTML links around URL-like constructs +import re, sys, fileinput + +def urlify_string(s): + urls = r'(http|telnet|gopher|file|wais|ftp)' + + ltrs = r'\w'; + gunk = r'/#~:.?+=&%@!\-' + punc = r'.:?\-' + any = ltrs + gunk + punc + + pat = re.compile(r""" + \b # start at word boundary + ( # begin \1 { + %(urls)s : # need resource and a colon + [%(any)s] +? # followed by one or more + # of any valid character, but + # be conservative and take only + # what you need to.... + ) # end \1 } + (?= # look-ahead non-consumptive assertion + [%(punc)s]* # either 0 or more punctuation + [^%(any)s] # followed by a non-url char + | # or else + $ # then end of the string + ) + """%locals(), re.VERBOSE | re.IGNORECASE) + return re.sub(pat, r"<A HREF=\1>\1</A>", s) + +if __name__ == "__main__": + for line in fileinput.input(): + print urlify_string(line) + + +# @@PLEAC@@_6.22 +##----------------------------- +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_6.23 +# The majority of regexes in this section are either partially +# or completely The Wrong Thing to Do. +##----------------------------- +# DON'T DO THIS. Use a Roman Numeral module, etc. (since +# you need one anyway to calculate values) +pat = r"^m*(d?c{0,3}|c[dm])(l?x{0,3}|x[lc])(v?i{0,3}|i[vx])$" +re.match(pat, "mcmlxcvii") +##----------------------------- +txt = "one two three four five" + +# If the words are cleanly delimited just split and rejoin: +word1, word2, rest = txt.split(" ", 2) +print " ".join([word2, word1, rest]) + +# Otherwise: +frompat = r"(\S+)(\s+)(\S+)" +topat = r"\3\2\1" +print re.sub(frompat, topat, txt) + +##----------------------------- +print str.split("=") + +# DON'T DO THIS +pat = r"(\w+)\s*=\s*(.*)\s*$" +print re.match(pat, "key=val").groups() +##----------------------------- +line = "such a very very very very very very very very very very very very very long line" +if len(line) > 80: + process(line) + +# DON'T DO THIS +pat = ".{80,}" +if re.match(pat, line): + process(line) +##----------------------------- +dt = time.strptime("12/11/05 12:34:56", "%d/%m/%y %H:%M:%S") + +# DON'T DO THIS +pat = r"(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)" +dt = re.match(pat, "12/11/05 12:34:56").groups() +##----------------------------- +txt = "/usr/bin/python" +print txt.replace("/usr/bin", "/usr/local/bin") +# Alternatively for file operations use os.path, shutil, etc. + +# DON'T DO THIS +print re.sub("/usr/bin", "/usr/local/bin", txt) +##----------------------------- +import re + +def unescape_hex(matchobj): + return chr(int(matchobj.groups(0)[0], 16)) +txt = re.sub(r"%([0-9A-Fa-f][0-9A-Fa-f])", unescape_hex, txt) + +# Assuming that the hex escaping is well-behaved, an alternative is: +def unescape_hex(seg): + return chr(int(seg[:2], 16)) + seg[2:] + +segs = txt.split("%") +txt = segs[0] + "".join(unescape_hex(seg) for seg in segs[1:]) +##----------------------------- +txt = re.sub(r""" + /\* # Match the opening delimiter + .*? # Match a minimal number of characters + \*/ # Match the closing delimiter + """, "", txt, re.VERBOSE) +##----------------------------- +txt.strip() + +# DON'T DO THIS +txt = re.sub(r"^\s+", "", txt) +txt = re.sub(r"\s+$", "", txt) +##----------------------------- +txt.replace("\\n", "\n") + +# DON'T DO THIS +txt = re.sub("\\n", "\n", txt) +##----------------------------- +txt = re.sub("^.*::", "") +##----------------------------- +import socket +socket.inet_aton(txt) # Will raise an error if incorrect + +# DON'T DO THIS. +octseg =r"([01]?\d\d|2[0-4]\d|25[0-5])" +dot = r"\." +pat = "^" + octseg + dot + octseg + dot + octseg + dot + octseg + "$" + +if not re.match(pat, txt, re.VERBOSE) + raise ValueError + +# Defitely DON'T DO THIS. +pat = r"""^([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])\. + ([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])$""" +##----------------------------- +fname = os.path.basename(path) + +# DON'T DO THIS. +fname = re.sub("^.*/", "", path) +##----------------------------- +import os +try: + tc = os.environ["TERMCAP"] +except KeyError: + cols = 80 +else: + cols = re.match(":co#(\d+):").groups(1) +##----------------------------- +# (not quite equivalent to the Perl version) +name = os.path.basename(sys.argv[0]) + +# DON'T DO THIS. +name = re.sub("^.*/", "", sys.argv[0]) +##----------------------------- +if sys.platform != "linux": + raise SystemExit("This isn't Linux") +##----------------------------- +txt = re.sub(r"\n\s+", " ", txt) + +# In many cases you could just use: +txt = txt.replace("\n", " ") +##----------------------------- +nums = re.findall(r"\d+\.?\d*|\.\d+", txt) +##----------------------------- +# If the words are clearly delimited just use: +capwords = [word for word in txt.split() if word.isupper()] + +# Otherwise +capwords = [word for word in re.findall(r"\b(\S+)\b", txt) if word.isupper()] + +# (probably) DON'T DO THIS. +capwords = re.findall(r"(\b[^\Wa-z0-9_]+\b)", txt) +##----------------------------- +# If the words are clearly delimited just use: +lowords = [word for word in txt.split() if word.islower()] + +# Otherwise +lowords = [word for word in re.findall(r"\b(\S+)\b", txt) if word.islower()] + +# (probably) DON'T DO THIS. +lowords = re.findall(r"(\b[^\WA-Z0-9_]+\b)", txt) +##----------------------------- +# If the words are clearly delimited just use: +icwords = [word for word in txt.split() if word.istitle()] + +# Otherwise +icwords = [word for word in re.finditer(r"\b(\S+)\b") if word.istitle()] + +# DON'T DO THIS. +icwords = re.findall(r"(\b[^\Wa-z0-9_][^\WA-Z0-9_]*\b)", txt) +##----------------------------- +# DON'T DO THIS - use HTMLParser, etc. +links = re.findall(r"""<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)[ '"]?>""", txt) +##----------------------------- +names = txt.split() +if len(names) == 3: + initial = names[1][0] +else: + initial = "" + +# DON'T DO THIS. +pat = "^\S+\s+(\S)\S*\s+\S" +try: + initial = re.match(pat, txt).group(1) +except AttributeError: + initial = "" +##----------------------------- +txt = re.sub('"([^"]*)"', "``\1''", txt) +##----------------------------- +sentences = [elem[0] for elem in re.findall(r"(.*?[!?.])( |\Z)", s)] +##----------------------------- +import time +dt = time.strptime(txt, "%Y-%m-%d") + +# DON'T DO THIS. +year, month, day = re.match(r"(\d{4})-(\d\d)-(\d\d)", txt).groups() +##----------------------------- +pat = r""" + ^ + (?: + 1 \s (?: \d\d\d \s)? # 1, or 1 and area code + | # ... or ... + \(\d\d\d\) \s # area code with parens + | # ... or ... + (?: \+\d\d?\d? \s)? # optional +country code + \d\d\d ([\s\-]) # and area code + ) + \d\d\d (\s|\1) # prefix (and area code separator) + \d\d\d\d # exchange + $ + """ +re.match(pat, txt, re.VERBOSE) +##----------------------------- +re.match(r"\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b", txt, re.IGNORECASE) +##----------------------------- +for line in file(fname, "Ur"): #Universal newlines + process(line) + +# DON'T DO THIS +lines = [re.sub(r"^([^\012\015]*)(\012\015?|\015\012?)", "", line) + for line in file(fname)] +##----------------------------- + + +# @@PLEAC@@_7.0 +for line in open("/usr/local/widgets/data"): + if blue in line: + print line[:-1] +#--------- +import sys, re +pattern = re.compile(r"\d") +for line in sys.stdin: + if not pattern.search(line): + sys.stderr.write("No digit found.\n") + sys.stdout.write("Read: " + line) +sys.stdout.close() +#--------- +logfile = open("/tmp/log", "w") +#--------- +logfile.close() +#--------- +print>>logfile, "Countdown initiated ..." +print "You have 30 seconds to reach minimum safety distance." + +# DONT DO THIS +import sys +old_output, sys.stdout = sys.stdout, logfile +print "Countdown initiated ..." +sys.stdout = old_output +print "You have 30 seconds to reach minimum safety distance." +#--------- + +# @@PLEAC@@_7.1 +# Python's open() function somewhat covers both perl's open() and +# sysopen() as it has optional arguments for mode and buffering. +source = open(path) +sink = open(path, "w") +#--------- +# NOTE: almost no one uses the low-level os.open and os.fdopen +# commands, so their inclusion here is just silly. If +# os.fdopen(os.open(...)) were needed often, it would be turned +# into its own function. Instead, I'll use 'fd' to hint that +# os.open returns a file descriptor +import os +source_fd = os.open(path, os.O_RDONLY) +source = os.fdopen(fd) +sink_fd = os.open(path, os.O_WRONLY) +sink = os.fdopen(sink_fd) +#--------- +myfile = open(filename, "w") +fd = os.open(filename, os.O_WRONLY | os.O_CREAT) +myfile = open(filename, "r+") +#--------- +fd = os.open(name, flags) +fd = os.open(name, flags, mode) +#--------- +myfile = open(path) +fd = os.open(path, os.O_RDONLY) +#----------------------------- +myfile = open(path, "w") +fd = os.open(path, os.O_WRONLY|os.O_TRUNC|os.O_CREAT) +fd = os.open(path, os.O_WRONLY|os.O_TRUNC|os.O_CREAT, 0600) +#----------------------------- +fd = os.open(path, os.O_WRONLY|os.O_EXCL|os.O_CREAT) +fd = os.open(path, os.O_WRONLY|os.O_EXCL|os.O_CREAT, 0600) +#----------------------------- +myfile = open(path, "a") +fd = os.open(path, os.O_WRONLY|os.O_APPEND|os.O_CREAT) +fd = os.open(path, os.O_WRONLY|os.O_APPEND|s.O_CREAT, 0600) +#----------------------------- +fd = os.open(path, os.O_WRONLY|os.O_APPEND) +#----------------------------- +myfile = open(path, "rw") +fd = os.open(path, os.O_RDWR) +#----------------------------- +fd = os.open(path, os.O_RDWR|os.O_CREAT) +fd = os.open(path, os.O_RDWR|os.O_CREAT, 0600) +#----------------------------- +fd = os.open(path, os.O_RDWR|os.O_EXCL|os.O_CREAT) +fd = os.open(path, os.O_RDWR|os.O_EXCL|os.O_CREAT, 0600) +#----------------------------- + +# @@PLEAC@@_7.2 +# Nothing different needs to be done with Python + +# @@PLEAC@@_7.3 +import os +filename = os.path.expanduser(filename) + +# @@PLEAC@@_7.4 +myfile = open(filename) # raise an exception on error + +try: + myfile = open(filename) +except IOError, err: + raise AssertionError("Couldn't open %s for reading : %s" % + (filename, err.strerror)) + +# @@PLEAC@@_7.5 +import tempfile + +myfile = tempfile.TemporaryFile() + +#----------------------------- +# NOTE: The TemporaryFile() call is much more appropriate +# I would not suggest using this code for real work. +import os, tempfile + +while True: + name = os.tmpnam() + try: + fd = os.open(name, os.O_RDWR|os.O_CREAT|os.O_EXCL) + break + except os.error: + pass +myfile = tempfile.TemporaryFileWrapper(os.fdopen(fd), name) + +# now go on to use the file ... +#----------------------------- +import os +while True: + tmpname = os.tmpnam() + fd = os.open(tmpnam, os.O_RDWR | os.O_CREAT | os.O_EXCL) + if fd: + tmpfile = os.fdopen(fd) + break + +os.remove(tmpnam) + +#----------------------------- +import tempfile + +myfile = tempfile.TemporaryFile(bufsize = 0) +for i in range(10): + print>>myfile, i +myfile.seek(0) +print "Tmp file has:", myfile.read() +#----------------------------- + +# @@PLEAC@@_7.6 +DATA = """\ +your data goes here +""" +for line in DATA.split("\n"): + pass # process the line + +# @@PLEAC@@_7.7 + +for line in sys.stdin: + pass # do something with the line + +# processing a list of files from commandline +import fileinput +for line in fileinput.input(): + do something with the line + +#----------------------------- +import sys + +def do_with(myfile): + for line in myfile: + print line[:-1] + +filenames = sys.argv[1:] +if filenames: + for filename in filenames: + try: + do_with(open(filename)) + except IOError, err: + sys.stderr.write("Can't open %s: %s\n" % (filename, err.strerror)) + continue +else: + do_with(sys.stdin) + +#----------------------------- +import sys, glob +ARGV = sys.argv[1:] or glob.glob("*.[Cch]") +#----------------------------- +# NOTE: the getopt module is the prefered mechanism for reading +# command line arguments +import sys +args = sys.argv[1:] +chop_first = 0 + +if args and args[0] == "-c": + chop_first += 1 + args = args[1:] + +# arg demo 2: Process optional -NUMBER flag + +# NOTE: You just wouldn't process things this way for Python, +# but I'm trying to preserve the same semantics. + +import sys, re +digit_pattern = re.compile(r"-(\d+)$") + +args = sys.argv[1:] +if args: + match = digit_pattern.match(args[0]) + if match: + columns = int(match.group(1)) + args = args[1:] + +# NOTE: here's the more idiomatic way, which also checks +# for the "--" or a non "-" argument to stop processing + +args = sys.argv[1:] +for i in range(len(args)): + arg = args[i] + if arg == "--" or not arg.startwith("-"): + break + if arg[1:].isdigit(): + columns = int(arg[1:]) + continue + + + +# arg demo 3: Process clustering -a, -i, -n, or -u flags +import sys, getopt +try: + args, filenames = getopt.getopt(sys.argv[1:], "ainu") +except getopt.error: + raise SystemExit("usage: %s [-ainu] [filenames] ..." % sys.argv[0]) + +append = ignore_ints = nostdout = unbuffer = 0 +for k, v in args: + if k == "-a": append += 1 + elif k == "-i": ignore_ints += 1 + elif k == "-n": nostdout += 1 + elif k == "-u": unbuffer += 1 + else: + raise AssertionError("Unexpected argument: %s" % k) + +#----------------------------- +# Note: Idiomatic Perl get translated to idiomatic Python +import fileinput +for line in fileinput.input(): + sys.stdout.write("%s:%s:%s" % + (fileinput.filename(), fileinput.filelineno(), line)) +#----------------------------- +#!/usr/bin/env python +# findlogin1 - print all lines containing the string "login" +for line in fileinput.input(): # loop over files on command line + if line.find("login") != -1: + sys.stdout.write(line) + +#----------------------------- +#!/usr/bin/env python +# lowercase - turn all lines into lowercase +### NOTE: I don't know how to do locales in Python +for line in fileinput.input(): # loop over files on command line + sys.stdout.write(line.lower()) + +#----------------------------- +#!/usr/bin/env python +# NOTE: The Perl code appears buggy, in that "Q__END__W" is considered +# to be a __END__ and words after the __END__ on the same line +# are included in the count!!! +# countchunks - count how many words are used. +# skip comments, and bail on file if __END__ +# or __DATA__ seen. +chunks = 0 +for line in fileinput.input(): + for word in line.split(): + if word.startswith("#"): + continue + if word in ("__DATA__", "__END__"): + fileinput.close() + break + chunks += 1 +print "Found", chunks, "chunks" + + +# @@PLEAC@@_7.8 +import shutil + +old = open("old") +new = open("new","w") + +for line in old: + new.writeline(line) +new.close() +old.close() + +shutil.copyfile("old", "old.orig") +shutil.copyfile("new", "old") + +# insert lines at line 20: +for i, line in enumerate(old): + if i == 20: + print>>new, "Extra line 1\n" + print>>new, "Extra line 2\n" + print>>new, line + + +# or delete lines 20 through 30: +for i, line in enumerate(old): + if 20 <= i <= 30: + continue + print>>new, line + + +# @@PLEAC@@_7.9 +# modifying with "-i" commandline switch is a perl feature +# python has fileinput +import fileinput, sys, time +today = time.strftime("%Y-%m-%d",time.localtime()) +for line in fileinput.input(inplace=1, backup=".orig"): + sys.stdout.write(line.replace("DATE",today)) + +# set up to iterate over the *.c files in the current directory, +# editing in place and saving the old file with a .orig extension. +import glob, re +match = re.compile("(?<=[pP])earl") +files = fileinput.FileInput(glob.glob("*.c"), inplace=1, backup=".orig") +while True: + line = files.readline() + sys.stderr.write(line) + if not line: + break + if files.isfirstline(): + sys.stdout.write("This line should appear at the top of each file\n") + sys.stdout.write(match.sub("erl",line)) + + +# @@PLEAC@@_7.10 +#----------------------------- +myfile = open(filename, "r+") +data = myfile.read() +# change data here +myfile.seek(0, 0) +myfile.write(data) +myfile.truncate(myfile.tell()) +myfile.close() +#----------------------------- +myfile = open(filename, "r+") +data = [process(line) for line in myfile] +myfile.seek(0, 0) +myfile.writelines(data) +myfile.truncate(myfile.tell()) +myfile.close() +#----------------------------- + +# @@PLEAC@@_7.11 + +import fcntl +myfile = open(somepath, 'r+') +fcntl.flock(myfile, fcntl.LOCK_EX) +# update file, then... +myfile.close() +#----------------------------- +fcntl.LOCK_SH +fcntl.LOCK_EX +fcntl.LOCK_NB +fcntl.LOCK_UN +#----------------------------- +import warnings +try: + fcntl.flock(myfile, fcntl.LOCK_EX|fcntl.LOCK_NB) +except IOError: + warnings.warn("can't immediately write-lock the file ($!), blocking ...") + fcntl.flock(myfile, fcntl.LOCK_EX) +#----------------------------- +fcntl.flock(myfile, fcntl.LOCK_UN) +#----------------------------- +# option "r+" instead "w+" stops python from truncating the file on opening +# when another process might well hold an advisory exclusive lock on it. +myfile = open(somepath, "r+") +fcntl.flock(myfile, fcntl.LOCK_EX) +myfile.seek(0, 0) +myfile.truncate(0) +print>>myfile, "\n" # or myfile.write("\n") +myfile.close() +#----------------------------- + +# @@PLEAC@@_7.12 +# Python doesn't have command buffering. Files can have buffering set, +# when opened: +myfile = open(filename, "r", buffering=0) #Unbuffered +myfile = open(filename, "r", buffering=1) #Line buffered +myfile = open(filename, "r", buffering=100) #Use buffer of (approx) 100 bytes +myfile = open(filename, "r", buffering=-1) #Use system default + +myfile.flush() # Flush the I/O buffer + +# stdout is treated as a file. If you ever need to flush it, do so: +import sys +sys.stdout.flush() + +# DON'T DO THIS. Use urllib, etc. +import socket +mysock = socket.socket() +mysock.connect(('www.perl.com', 80)) +# mysock.setblocking(True) +mysock.send("GET /index.html http/1.1\n\n") +f = mysock.makefile() +print "Doc is:" +for line in f: + print line[:-1] + +# @@PLEAC@@_7.13 +import select +while True: + rlist, wlist, xlist = select.select([file1, file2, file3], [], [], 0) + for r in rlist: + pass # Do something with the file handle + +# @@PLEAC@@_7.14 +# @@SKIP@@ Use select.poll() on Unix systems. +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_7.15 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_7.16 +# NOTE: this is all much easier in Python +def subroutine(myfile): + print>>myfile, "Hello, file" + +variable = myfile +subroutine(variable) + +# @@PLEAC@@_7.17 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_7.18 +for myfile in files: + print>>myfile, stuff_to_print + +# NOTE: This is unix specific +import os +file = os.popen("tee file1 file2 file3 >/dev/null", "w") +print>>myfile, "whatever" + +# NOTE: the "make STDOUT go to three files" is bad programming style +import os, sys +sys.stdout.file = os.popen("tee file1 file2 file3", "w") +print "whatever" +sys.stdout.close() + +# You could use a utility object to redirect writes: +class FileDispatcher(object): + def __init__(self, *files): + self.files = files + + def write(self, msg): + for f in self.files: + f.write(msg) + + def close(self): + for f in self.files: + f.close() + +x = open("C:/test1.txt", "w") +y = open("C:/test2.txt", "w") +z = open("C:/test3.txt", "w") + +fd = FileDispatcher(x, y, z) +print>>fd, "Foo" # equiv to fd.write("Foo"); fd.write("\n") +print>>fd, "Testing" +fd.close() + +# @@PLEAC@@_7.19 +import os +myfile = os.fdopen(fdnum) # open the descriptor itself +myfile = os.fdopen(os.dup(fdnum)) # open to a copy of the descriptor + +### +outcopy = os.fdopen(os.dup(sys.stdin.fileno()), "w") +incopy = os.fdopen(os.dup(sys.stdin.fileno()), "r") + +# @@PLEAC@@_7.20 +original = open("C:/test.txt") +alias = original +alias.close() +print original.closed +#=>True + +import copy + +original = open("C:/test.txt") +dupe = copy.copy(original) +dupe.close() +print original.closed +#=>False + +# DON'T DO THIS. +import sys +oldstderr = sys.stderr +oldstdout = sys.stdout + +sys.stderr = open("C:/stderrfile.txt") +sys.stdout = open("C:/stdoutfile.txt") + +print "Blah" # Will be written to C:/stdoutfile.txt +sys.stdout.close() + +sys.stdout = oldstdout +sys.stderr = oldstderr + + +# @@PLEAC@@_7.21 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_7.22 +# On Windows: +import msvcrt +myfile.seek(5, 0) +msvcrt.locking(myfile.fileno(), msvcrt.LK_NBLCK, 3) + +# On Unix: +import fcntl +fcntl.lockf(myfile.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB, 3, 5) + + +# ^^PLEAC^^_8.0 +#----------------------------- +for line in DATAFILE: + line = line.rstrip() + size = len(line) + print size # output size of line + +#----------------------------- +for line in datafile: + print length(line.rstrip()) # output size of line +#----------------------------- +lines = datafile.readlines() +#----------------------------- +whole_file = myfile.read() +#----------------------------- +## No direct equivalent in Python +#% perl -040 -e '$word = <>; print "First word is $word\n";' +#----------------------------- +## No direct equivalent in Python +#% perl -ne 'BEGIN { $/="%%\n" } chomp; print if /Unix/i' fortune.dat +#----------------------------- +print>>myfile, "One", "two", "three" # "One two three" +print "Baa baa black sheep." # Sent to default output file +#----------------------------- +buffer = myfile.read(4096) +rv = len(buffer) +#----------------------------- +myfile.truncate(length) +open("/tmp/%d.pid" % os.getpid(), "a").truncate(length) +#----------------------------- +pos = myfile.tell() +print "I'm", pos, "bytes from the start of DATAFILE." +#----------------------------- +logfile.seek(0, 2) # Seek to the end +datafile.seek(pos) # Seek to a given byte +outfile.seek(-20, 1) # Seek back 20 bytes +#----------------------------- +written = os.write(datafile.fileno(), mystr) +if written != len(mystr): + warnings.warn("only read %s bytes, not %s" % (written, len(mystr))) +#----------------------------- +pos = os.lseek(myfile.fileno(), 0, 1) # don't change position +#----------------------------- + + +# ^^PLEAC^^_8.1 +def ContReader(infile): + lines = [] + for line in infile: + line = line.rstrip() + if line.endswith("\\"): + lines.append(line[:-1]) + continue + lines.append(line) + yield "".join(lines) + lines = [] + if lines: + yield "".join(lines) + +for line in ContReader(datafile): + pass # process full record in 'line' here + +# ^^PLEAC^^_8.2 +import os +count = int(os.popen("wc -l < " + filename).read()) +#----------------------------- +for count, line in enumerate(open(filename)): + pass +count += 1 # indexing is zero based +#----------------------------- +myfile = open(filename) +count = 0 +for line in myfile: + count += 1 +# 'count' now holds the number of lines read +#----------------------------- +myfile = open(filename) +count = 0 +while True: + line = myfile.readline() + if not line: + break + count += 1 +#----------------------------- +count = 0 +while True: + s = myfile.read(2**16) + count += s.count("\n") +#----------------------------- +for line, count in zip(open(filename), xrange(1, sys.maxint)): + pass +# 'count' now holds the number of lines read +#----------------------------- +import fileinput +fi = fileinput.FileInput(filename) +while fi.readline(): pass + +count = fi.lineno() +#----------------------------- +def SepReader(infile, sep = "\n\n"): + text = infile.read(10000) + if not text: + return + while True: + fields = text.split(sep) + for field in fields[:-1]: + yield field + text = fields[-1] + new_text = infile.read(10000) + if not new_text: + yield text + break + text += new_text + +para_count = 0 +for para in SepReader(open(filename)): + para_count += 1 +# FIXME: For my test case (Python-pre2.2 README from CVS) this +# returns 175 paragraphs while Perl returns 174. +#----------------------------- + + +# ^^PLEAC^^_8.3 +for line in sys.stdin: + for word in line.split(): + pass # do something with 'chunk' +#----------------------------- +pat = re.compile(r"(\w[\w'-]*)") +for line in sys.stdin: + pos = 0 + while True: + match = pat.search(line, pos) + if not match: + break + pos = match.end(1) + # do something with match.group(1) + +# EXPERIMENTAL in the sre implementation but +# likely to be included in future (post-2.2) releases. +pat = re.compile(r"(\w[\w'-]*)") +for line in sys.stdin: + scanner = pat.scanner(line) + while True: + match = scanner.search() + if not match: + break + # do something with match.group(1) + + +#----------------------------- +# Make a word frequency count +import fileinput, re +pat = re.compile(r"(\w[\w'-]*)") +seen = {} +for line in fileinput.input(): + pos = 0 + while True: + match = pat.search(line, pos) + if not match: + break + pos = match.end(1) + text = match.group(1).lower() + seen[text] = seen.get(text, 0) + 1 + +# output dict in a descending numeric sort of its values +for text, count in sorted(seen.items, key=lambda item: item[1]): + print "%5d %s" % (count, text) + +#----------------------------- +# Line frequency count +import fileinput, sys +seen = {} +for line in fileinput.input(): + text = line.lower() + seen[text] = seen.get(text, 0) + 1 + +for text, count in sorted(seen.items, key=lambda item: item[1]): + sys.stdout.write("%5d %s" % (count, text)) + +#----------------------------- + + +# ^^PLEAC^^_8.4 +lines = myfile.readlines() +while lines: + line = lines.pop() + # do something with 'line' + +#----------------------------- +for line in reversed(myfile): + pass # do something with line +#----------------------------- +for i in range(len(lines)): + line = lines[-i] +#----------------------------- +for paragraph in sorted(SepReader(infile)): + pass # do something +#----------------------------- + + + +# ^^PLEAC^^_8.5 +import time +while True: + for line in infile: + pass # do something with the line + time.sleep(SOMETIME) + infile.seek(0, 1) +#----------------------------- +import time +naptime = 1 + +logfile = open("/tmp/logfile") +while True: + for line in logfile: + print line.rstrip() + time.sleep(naptime) + infile.seek(0, 1) +#----------------------------- +while True: + curpos = logfile.tell() + while True: + line = logfile.readline() + if not line: + break + curpos = logfile.tell() + sleep(naptime) + logfile.seek(curpos, 0) # seek to where we had been +#----------------------------- +import os +if os.stat(LOGFILENAME).st_nlink == 0: + raise SystemExit +#----------------------------- + + +# ^^PLEAC^^_8.6 +import random, fileinput +text = None +for line in fileinput.input(): + if random.randrange(fileinput.lineno()) == 0: + text = line +# 'text' is the random line +#----------------------------- +# XXX is the perl code correct? Where is the fortunes file opened? +import sys +adage = None +for i, rec in enumerate(SepReader(open("/usr/share/games/fortunes"), "%\n")): + if random.randrange(i+1) == 0: + adage = rec +print adage +#----------------------------- + + +# ^^PLEAC^^_8.7 +import random +lines = data.readlines() +random.shuffle(lines) +for line in lines: + print line.rstrip() +#----------------------------- + + + +# ^^PLEAC^^_8.8 +# using efficient caching system +import linecache +linecache.getline(filename, DESIRED_LINE_NUMBER) + +# or doing it more oldskool +lineno = 0 +while True: + line = infile.readline() + if not line or lineno == DESIRED_LINE_NUMBER: + break + lineno += 1 +#----------------------------- +lines = infile.readlines() +line = lines[DESIRED_LINE_NUMBER] +#----------------------------- +for i in range(DESIRED_LINE_NUMBER): + line = infile.readline() + if not line: + break +#----------------------------- + +## Not sure what this thing is doing. Allow fast access to a given +## line number? + +# usage: build_index(*DATA_HANDLE, *INDEX_HANDLE) + +# ^^PLEAC^^_8.9 +# given $RECORD with field separated by PATTERN, +# extract @FIELDS. +fields = re.split(pattern_string, text) +#----------------------------- +pat = re.compile(pattern_string) +fields = pat.split(text) +#----------------------------- +re.split(r"([+-])", "3+5-2") +#----------------------------- +[3, '+', 5, '-', 2] +#----------------------------- +fields = record.split(":") +#----------------------------- +fields = re.split(r":", record) +#----------------------------- +fields = re.split(r"\s+", record) +#----------------------------- +fields = record.split(" ") +#----------------------------- + + +# ^^PLEAC^^_8.10 +myfile = open(filename, "r") +prev_pos = pos = 0 +while True: + line = myfile.readline() + if not line: + break + prev_pos = pos + pos = myfile.tell() +myfile = open(filename, "a") +myfile.truncate(prev_pos) +#----------------------------- + + + +# ^^PLEAC^^_8.11 +open(filename, "rb") +open(filename, "wb") +#----------------------------- +gifname = "picture.gif" +gif_file = open(gifname, "rb") + +# Don't think there's an equivalent for these in Python +#binmode(GIF); # now DOS won't mangle binary input from GIF +#binmode(STDOUT); # now DOS won't mangle binary output to STDOUT + +#----------------------------- +while True: + buff = gif.read(8 * 2**10) + if not buff: + break + sys.stdout.write(buff) +#----------------------------- + + + +# ^^PLEAC^^_8.12 +address = recsize * recno +myfile.seek(address, 0) +buffer = myfile.read(recsize) +#----------------------------- +address = recsize * (recno-1) +#----------------------------- + + + +# ^^PLEAC^^_8.13 +import posixfile +address = recsize * recno +myfile.seek(address) +buffer = myfile.read(recsize) +# ... work with the buffer, then turn it back into a string and ... +myfile.seek(-recsize, posixfile.SEEK_CUR) +myfile.write(buffer) +myfile.close() +#----------------------------- +## Not yet implemented +# weekearly -- set someone's login date back a week +# @@INCOMPLETE@@ + + +# ^^PLEAC^^_8.14 +## Note: this isn't optimal -- the 's+=c' may go O(N**2) so don't +## use for large strings. +myfile.seek(addr) +s = "" +while True: + c = myfile.read(1) + if not c or c == "\0": + break + s += c +#----------------------------- +myfile.seek(addr) +offset = 0 +while True: + s = myfile.read(1000) + x = s.find("\0") + if x != -1: + offset += x + break + offset += len(s) + if len(s) != 1000: # EOF + break +myfile.seek(addr) +s = myfile.read(offset - 1) +myfile.read(1) + +#----------------------------- +## Not Implemented +# bgets - get a string from an address in a binary file +#----------------------------- +#!/usr/bin/perl +# strings - pull strings out of a binary file +import re, sys + +## Assumes SepReader from above + +pat = re.compile(r"([\040-\176\s]{4,})") +for block in SepReader(sys.stdin, "\0"): + pos = 0 + while True: + match = pat.search(block, pos) + if not match: + break + print match.group(1) + pos = match.end(1) +#----------------------------- + + +# @@PLEAC@@_8.15 + +# RECORDSIZE is the length of a record, in bytes. +# TEMPLATE is the unpack template for the record +# FILE is the file to read from +# FIELDS is a tuple, one element per field +import struct +RECORDSIZE= struct.calcsize(TEMPLATE) +while True: + record = FILE.read(RECORDSIZE): + if len(record)!=RECORDSIZE: + raise "short read" + FIELDS = struct.unpack(TEMPLATE, record) +# ---- + + +# ^^PLEAC^^_8.16 +# NOTE: to parse INI file, see the stanard ConfigParser module. +import re +pat = re.compile(r"\s*=\s*") +for line in config_file: + if "#" in line: # no comments + line = line[:line.index("#")] + line = line.strip() # no leading or trailing white + if not line: # anything left? + continue + m = pat.search(line) + var = line[:m.start()] + value = line[m.end():] + User_Preferences[var] = value + + +# ^^PLEAC^^_8.17 +import os + +mode, ino, dev, nlink, uid, gid, size, \ +atime, mtime, ctime = os.stat(filename) + +mode &= 07777 # discard file type info + +#----------------------------- +info = os.stat(filename) +if info.st_uid == 0: + print "Superuser owns", filename +if info.st_atime > info.st_mtime: + print filename, "has been read since it was written." +#----------------------------- +import os +def is_safe(path): + info = os.stat(path) + + # owner neither superuser nor me + # the real uid is in stored in the $< variable + if info.st_uid not in (0, os.getuid()): + return False + + # check whether group or other can write file. + # use 066 to detect either reading or writing + if info.st_mode & 022: # someone else can write this + if not os.path.isdir(path): # non-directories aren't safe + return False + # but directories with the sticky bit (01000) are + if not (info.st_mode & 01000): + return False + return True +#----------------------------- +## XXX What is '_PC_CHOWN_RESTRICTED'? + +def is_verysafe(path): + terms = [] + while True: + path, ending = os.path.split(path) + if not ending: + break + terms.insert(0, ending) + for term in terms: + path = os.path.join(path, term) + if not is_safe(path): + return False + return True +#----------------------------- + +# Program: tctee +# Not Implemented (requires reimplementing Perl's builtin '>>', '|', +# etc. semantics) + +# @@PLEAC@@_8.18 +#!/usr/bin/python +# tailwtmp - watch for logins and logouts; +# uses linux utmp structure, from /usr/include/bits/utmp.h + +# /* The structure describing an entry in the user accounting database. */ +# struct utmp +# { +# short int ut_type; /* Type of login. */ +# pid_t ut_pid; /* Process ID of login process. */ +# char ut_line[UT_LINESIZE]; /* Devicename. */ +# char ut_id[4]; /* Inittab ID. */ +# char ut_user[UT_NAMESIZE]; /* Username. */ +# char ut_host[UT_HOSTSIZE]; /* Hostname for remote login. */ +# struct exit_status ut_exit; /* Exit status of a process marked +# as DEAD_PROCESS. */ +# long int ut_session; /* Session ID, used for windowing. */ +# struct timeval ut_tv; /* Time entry was made. */ +# int32_t ut_addr_v6[4]; /* Internet address of remote host. */ +# char __unused[20]; /* Reserved for future use. */ +# }; + +# /* Values for the `ut_type' field of a `struct utmp'. */ +# #define EMPTY 0 /* No valid user accounting information. */ +# +# #define RUN_LVL 1 /* The system's runlevel. */ +# #define BOOT_TIME 2 /* Time of system boot. */ +# #define NEW_TIME 3 /* Time after system clock changed. */ +# #define OLD_TIME 4 /* Time when system clock changed. */ +# +# #define INIT_PROCESS 5 /* Process spawned by the init process. */ +# #define LOGIN_PROCESS 6 /* Session leader of a logged in user. */ +# #define USER_PROCESS 7 /* Normal process. */ +# #define DEAD_PROCESS 8 /* Terminated process. */ +# +# #define ACCOUNTING 9 + +import time +import struct +import os + +class WTmpRecord: + fmt = "hI32s4s32s256siili4l20s"; + _fieldnames = ["type","PID","Line","inittab","User","Hostname", + "exit_status", "session", "time", "addr" ] + def __init__(self): + self._rec_size = struct.calcsize(self.fmt) + def size(self): + return self._rec_size + def unpack(self, bin_data): + rec = struct.unpack(self.fmt, bin_data) + self._rec = [] + for i in range(len(rec)): + if i in (2,3,4,5): + # remove character zeros from strings + self._rec.append( rec[i].split("\0")[0] ) + else: + self._rec.append(rec[i]) + return self._rec + def fieldnames(self): + return self._fieldnames + def __getattr__(self,name): + return self._rec[self._fieldnames.index(name)] + +rec = WTmpRecord() +f = open("/var/log/wtmp","rb") +f.seek(0,2) +while True: + while True: + bin = f.read(rec.size()) + if len(bin) != rec.size(): + break + rec.unpack(bin) + if rec.type != 0: + print " %1d %-8s %-12s %-24s %-20s %5d %08x" % \ + (rec.type, rec.User, rec.Line, + time.strftime("%a %Y-%m-%d %H:%M:%S",time.localtime(rec.time)), + rec.Hostname, rec.PID, rec.addr) + time.sleep(1) +f.close() + +# @@PLEAC@@_8.19 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_8.20 +#!/usr/bin/python +# laston - find out when given user last logged on +import sys +import struct +import pwd +import time +import re + +f = open("/var/log/lastlog","rb") + +fmt = "L32s256s" +rec_size = struct.calcsize(fmt) + +for user in sys.argv[1:]: + if re.match(r"^\d+$", user): + user_id = int(user) + else: + try: + user_id = pwd.getpwnam(user)[2] + except: + print "no such uid %s" % (user) + continue + f.seek(rec_size * user_id) + bin = f.read(rec_size) + if len(bin) == rec_size: + data = struct.unpack(fmt, bin) + if data[0]: + logged_in = "at %s" % (time.strftime("%a %H:%M:%S %Y-%m-%d", + time.localtime(data[0]))) + line = " on %s" % (data[1]) + host = " from %s" % (data[2]) + else: + logged_in = "never logged in" + line = "" + host = "" + print "%-8s UID %5d %s%s%s" % (user, user_id, logged_in, line, host) + else: + print "Read failed." +f.close() + + +# ^^PLEAC^^_9.0 +#----------------------------- +entry = os.stat("/usr/bin/vi") +#----------------------------- +entry = os.stat("/usr/bin") +#----------------------------- +entry = os.stat(INFILE.name) +#----------------------------- +entry = os.stat("/usr/bin/vi") +ctime = entry.st_ino +size = entry.st_size +#----------------------------- +f = open(filename) + +f.seek(0, 2) +if not f.tell(): + raise SystemExit("%s doesn't have text in it."%filename) +#----------------------------- + +for filename in os.listdir("/usr/bin"): + print "Inside /usr/bin is something called", filename +#----------------------------- + +# ^^PLEAC^^_9.1 +#----------------------------- +fstat = os.stat(filename) +readtime = fstat.st_atime +writetime = fstat.st_mtime + +os.utime(filename, (newreadtime, newwritetime)) + +#DON'T DO THIS: +readtime, writetime = os.stat(filename)[7:9] +#----------------------------- +SECONDS_PER_DAY = 60 * 60 * 24 +fstat = os.stat(filename) +atime = fstat.st_atime - 7 * SECONDS_PER_DAY +mtime = fstat.st_mtime - 7 * SECONDS_PER_DAY + +os.utime(filename, (atime, mtime)) +#----------------------------- +mtime = os.stat(filename).st_mtime +utime(filename, (time.time(), mtime)) +#----------------------------- +#!/usr/bin/perl -w +# uvi - vi a file without changing its access times + +import sys, os +if len(sys.argv) != 2: + raise SystemExit("usage: uvi filename") +filename = argv[1] +fstat = os.stat(filename) +# WARNING: potential security risk +os.system( (os.environ.get("EDITOR") or "vi") + " " + filename) +os.utime(filename, (fstat.st_atime, fstat.st_mtime)) +#----------------------------- + +# ^^PLEAC^^_9.2 +#----------------------------- +os.remove(filename) + +err_flg = 0 +for filename in filenames: + try: + os.remove(filename) + except OSError, err: + err_flg = 1 +if err_flg: + raise OSError("Couldn't remove all of %s: %s" % (filenames, err)) +#----------------------------- +os.remove(filename) +#----------------------------- +success = 0 +for filename in filenames: + try: + os.remove(filename) + success += 1 + except OSError, err: + pass +if success != len(filenames): + sys.stderr.write("could only delete %d of %d files" % \ + (success, len(filenames))) + +#----------------------------- + +# ^^PLEAC^^_9.3 +#----------------------------- +import shutil +shutil.copy(oldfile, newfile) +#----------------------------- +## NOTE: this doesn't do the same thing as the Perl code, +## eg, handling of partial writes. +infile = open(oldfile) +outfile = open(newfile, "w") + +blksize = 16384 # preferred block size? + +while True: + buf = infile.read(blksize) + if not buf: + break + outfile.write(buf) + +infile.close() +outfile.close() +#----------------------------- +# WARNING: these are insecure - do not use in hostile environments +os.system("cp %s %s" % (oldfile, newfile)) # unix +os.system("copy %s %s" % (oldfile, newfile)) # dos, vms +#----------------------------- +import shutil + +shutil.copy("datafile.dat", "datafile.bak") + +shutil.copy("datafile.new", "datafile.dat") +os.remove("datafile.new") + +#----------------------------- + +# ^^PLEAC^^_9.4 +#----------------------------- +import os +seen = {} + +def do_my_thing(filename): + fstat = os.stat(filename) + key = (fstat.st_ino, fstat.st_dev) + if not seen.get(key): + # do something with filename because we haven't + # seen it before + pass + seen[key] = seen.get(key, 0 ) + 1 + +#----------------------------- +for filename in files: + fstat = os.stat(filename) + key = (fstat.st_ino, fstat.st_dev) + seen.setdefault(key, []).append(filename) + +keys = seen.keys() +keys.sort() +for inodev in keys: + ino, dev = inodev + filenames = seen[inodev] + if len(filenames) > 1: + # 'filenames' is a list of filenames for the same file + pass +#----------------------------- + +# ^^PLEAC^^_9.5 +#----------------------------- +for filename in os.listdir(dirname): + # do something with "$dirname/$file" + pass +#----------------------------- +# XXX No -T equivalent in Python +#----------------------------- +# 'readir' always skipes '.' and '..' on OSes where those are +# standard directory names +for filename in os.listdir(dirname): + pass +#----------------------------- +# XX Not Implemented -- need to know what DirHandle does +# use DirHandle; + +#----------------------------- + +# ^^PLEAC^^_9.6 +#----------------------------- +import glob +filenames = glob.glob("*.c") +#----------------------------- +filenames = [filename for filename in os.listdir(path) if filename.endswith(".c")] +#----------------------------- +import re +allowed_name = re.compile(r"\.[ch]$", re.I).search +filenames = [f for f in os.listdir(path) if allowed_name(f)] +#----------------------------- +import re, os +allowed_name = re.compile(r"\.[ch]$", re.I).search + +fnames = [os.path.join(dirname, fname) + for fname in os.listdir(dirname) + if allowed_name(fname)] +#----------------------------- +dirs = [os.path.join(path, f) + for f in os.listdir(path) if f.isdigit()] +dirs = [d for d in dirs if os.path.isdir(d)] +dirs = sorted(dirs, key=int) # Sort by numeric value - "9" before "11" +#----------------------------- + +# @@PLEAC@@_9.7 +# Processing All Files in a Directory Recursively + +# os.walk is new in 2.3. + +# For pre-2.3 code, there is os.path.walk, which is +# little harder to use. + +#----------------------------- +import os +for root, dirs, files in os.walk(top): + pass # do whatever + +#----------------------------- +import os, os.path +for root, dirs, files in os.walk(top): + for name in dirs: + print os.path.join(root, name) + '/' + for name in files: + print os.path.join(root, name) + +#----------------------------- +import os, os.path +numbytes = 0 +for root, dirs, files in os.walk(top): + for name in files: + path = os.path.join(root, name) + numbytes += os.path.getsize(path) +print "%s contains %s bytes" % (top, numbytes) + +#----------------------------- +import os, os.path +saved_size, saved_name = -1, '' +for root, dirs, files in os.walk(top): + for name in files: + path = os.path.join(root, name) + size = os.path.getsize(path) + if size > saved_size: + saved_size = size + saved_name = path +print "Biggest file %s in %s is %s bytes long" % ( + saved_name, top, saved_size) + +#----------------------------- +import os, os.path, time +saved_age, saved_name = None, '' +for root, dirs, files in os.walk(top): + for name in files: + path = os.path.join(root, name) + age = os.path.getmtime(path) + if saved_age is None or age > saved_age: + saved_age = age + saved_name = path +print "%s %s" % (saved_name, time.ctime(saved_age)) + +#----------------------------- +#!/usr/bin/env python +# fdirs - find all directories +import sys, os, os.path +argv = sys.argv[1:] or ['.'] +for top in argv: + for root, dirs, files in os.walk(top): + for name in dirs: + path = os.path.join(root, name) + print path + + +# ^^PLEAC^^_9.8 +#----------------------------- +# DeleteDir - remove whole directory trees like rm -r +import shutil +shutil.rmtree(path) + +# DON'T DO THIS: +import os, sys +def DeleteDir(dir): + for name in os.listdir(dir): + file = os.path.join(dir, name) + if not os.path.islink(file) and os.path.isdir(file): + DeleteDir(file) + else: + os.remove(file) + os.rmdir(dir) + +# @@PLEAC@@_9.9 +# Renaming Files + +# code sample one to one from my perlcookbook +# looks strange to me. +import os +for fname in fnames: + newname = fname + # change the file's name + try: + os.rename(fname, newname) + except OSError, err: + print "Couldn't rename %s to %s: %s!" % \ + (fname, newfile, err) + +# use os.renames if newname needs directory creation. + +#A vaguely Pythonic solution is: +import glob +def rename(files, transfunc) + for fname in fnames: + newname = transfunc(fname) + try: + os.rename(fname, newname) + except OSError, err: + print "Couldn't rename %s to %s: %s!" % \ + (fname, newfile, err) + +def transfunc(fname): + return fname[:-5] +rename(glob.glob("*.orig"), transfunc) + +def transfunc(fname): + return fname.lower() +rename([f for f in glob.glob("*") if not f.startswith("Make)], transfunc) + +def transfunc(fname): + return fname + ".bad" +rename(glob.glob("*.f"), transfunc) + +def transfunc(fname): + answer = raw_input(fname + ": ") + if answer.upper().startswith("Y"): + return fname.replace("foo", "bar") +rename(glob.glob("*"), transfunc) + +def transfunc(fname): + return ".#" + fname[:-1] +rename(glob.glob("/tmp/*~"), transfunc) + +# This _could_ be made to eval code taken directly from the command line, +# but it would be fragile +#----------------------------- + +# ^^PLEAC^^_9.10 +#----------------------------- +import os + +base = os.path.basename(path) +dirname = os.path.dirname(path) +dirname, filename = os.path.split(path) +base, ext = os.path.splitext(filename) + +#----------------------------- +path = '/usr/lib/libc.a' +filename = os.path.basename(path) +dirname = os.path.dirname(path) + +print "dir is %s, file is %s" % (dirname, filename) +# dir is /usr/lib, file is libc.a +#----------------------------- +path = '/usr/lib/libc.a' +dirname, filename = os.path.split(path) +name, ext = os.path.splitext(filename) + +print "dir is %s, name is %s, extension is %s" % (dirname, name, ext) +# NOTE: The Python code prints +# dir is /usr/lib, name is libc, extension is .a +# while the Perl code prints a '/' after the directory name +# dir is /usr/lib/, name is libc, extension is .a +#----------------------------- +import macpath +path = "Hard%20Drive:System%20Folder:README.txt" +dirname, base = macpath.split(path) +name, ext = macpath.splitext(base) + +print "dir is %s, name is %s, extension is %s" % (dirname, name, ext) +# dir is Hard%20Drive:System%20Folder, name is README, extension is .txt +#----------------------------- +# DON'T DO THIS - it's not portable. +def extension(path): + pos = path.find(".") + if pos == -1: + return "" + ext = path[pos+1:] + if "/" in ext: + # wasn't passed a basename -- this is of the form 'x.y/z' + return "" + return ext +#----------------------------- + +# @@PLEAC@@_9.11 + +#!/usr/bin/python +# sysmirror - build spectral forest of symlinks +import sys, os, os.path + +pgmname = sys.argv[0] +if len(sys.argv)!=3: + print "usage: %s realdir mirrordir" % pgmname + raise SystemExit + +(srcdir, dstdir) = sys.argv[1:3] +if not os.path.isdir(srcdir): + print "%s: %s is not a directory" % (pgmname,srcdir) + raise SystemExit +if not os.path.isdir(dstdir): + try: + os.mkdir(dstdir) + except OSError: + print "%s: can't make directory %s" % (pgmname,dstdir) + raise SystemExit + +# fix relative paths +srcdir = os.path.abspath(srcdir) +dstdir = os.path.abspath(dstdir) + +def wanted(arg, dirname, names): + for direntry in names: + relname = "%s/%s" % (dirname, direntry) + if os.path.isdir(relname): + mode = os.stat(relname).st_mode + try: + os.mkdir("%s/%s" % (dstdir,relname), mode) + except: + print "can't mkdir %s/%s" % (dstdir,relname) + raise SystemExit + else: + if relname[:2] == "./": + relname = relname[2:] + os.symlink("%s/%s" % (srcdir, relname), "%s/%s" % (dstdir,relname)) + +os.chdir(srcdir) +os.path.walk(".",wanted,None) + +# @@PLEAC@@_9.12 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + + +# ^^PLEAC^^_10.0 +#----------------------------- +# DO NOT DO THIS... +greeted = 0 +def hello(): + global greeted + greeted += 1 + print "hi there" + +#... as using a callable object to save state is cleaner +# class hello +# def __init__(self): +# self.greeted = 0 +# def __call__(self): +# self.greeted += 1 +# print "hi there" +# hello = hello() +#----------------------------- +hello() # call subroutine hello with no arguments/parameters +#----------------------------- + +# ^^PLEAC^^_10.1 +#----------------------------- +import math +# Provided for demonstration purposes only. Use math.hypot() instead. +def hypotenuse(side1, side2): + return math.sqrt(side1**2 + side2**2) + +diag = hypotenuse(3, 4) # diag is 5.0 +#----------------------------- +print hypotenuse(3, 4) # prints 5.0 + +a = (3, 4) +print hypotenuse(*a) # prints 5.0 +#----------------------------- +both = men + women +#----------------------------- +nums = [1.4, 3.5, 6.7] +# Provided for demonstration purposes only. Use: +# ints = [int(num) for num in nums] +def int_all(nums): + retlist = [] # make new list for return + for n in nums: + retlist.append(int(n)) + return retlist +ints = int_all(nums) # nums unchanged +#----------------------------- +nums = [1.4, 3.5, 6.7] + +def trunc_em(nums): + for i,elem in enumerate(nums): + nums[i] = int(elem) +trunc_em(nums) # nums now [1,3,6] + +#----------------------------- +# By convention, if a method (or function) modifies an object +# in-place, it returns None rather than the modified object. +# None of Python's built-in functions modify in-place; methods +# such as list.sort() are somewhat more common. +mylist = [3,2,1] +mylist = mylist.sort() # incorrect - returns None +mylist = sorted(mylist) # correct - returns sorted copy +mylist.sort() # correct - sorts in-place +#----------------------------- + +# ^^PLEAC^^_10.2 +#----------------------------- +# Using global variables is discouraged - by default variables +# are visible only at and below the scope at which they are declared. +# Global variables modified by a function or method must be declared +# using the "global" keyword if they are modified +def somefunc(): + variable = something # variable is invisible outside of somefunc +#----------------------------- +import sys +name, age = sys.args[1:] # assumes two and only two command line parameters +start = fetch_time() +#----------------------------- +a, b = pair +c = fetch_time() + +def check_x(x): + y = "whatever" + run_check() + if condition: + print "got", x +#----------------------------- +def save_list(*args): + Global_List.extend(args) +#----------------------------- + +# ^^PLEAC^^_10.3 +#----------------------------- +## Python allows static nesting of scopes for reading but not writing, +## preferring to use objects. The closest equivalent to: +#{ +# my $counter; +# sub next_counter { return ++$counter } +#} +## is: +def next_counter(counter=[0]): # default lists are created once only. + counter[0] += 1 + return counter[0] + +# As that's a little tricksy (and can't make more than one counter), +# many Pythonistas would prefer either: +def make_counter(): + counter = 0 + while True: + counter += 1 + yield counter +next_counter = make_counter().next + +# Or: +class Counter: + def __init__(self): + self.counter = 0 + def __call__(self): + self.counter += 1 + return self.counter +next_counter = Counter() + +#----------------------------- +## A close equivalent of +#BEGIN { +# my $counter = 42; +# sub next_counter { return ++$counter } +# sub prev_counter { return --$counter } +#} +## is to use a list (to save the counter) and closured functions: +def make_counter(start=0): + counter = [start] + def next_counter(): + counter[0] += 1 + return counter[0] + def prev_counter(): + counter[0] -= 1 + return counter[0] + return next_counter, prev_counter +next_counter, prev_counter = make_counter() + +## A clearer way uses a class: +class Counter: + def __init__(self, start=0): + self.value = start + def next(self): + self.value += 1 + return self.value + def prev(self): + self.value -= 1 + return self.value + def __int__(self): + return self.value + +counter = Counter(42) +next_counter = counter.next +prev_counter = counter.prev +#----------------------------- + +# ^^PLEAC^^_10.4 +## This sort of code inspection is liable to change as +## Python evolves. There may be cleaner ways to do this. +## This also may not work for code called from functions +## written in C. +#----------------------------- +import sys +this_function = sys._getframe(0).f_code.co_name +#----------------------------- +i = 0 # how far up the call stack to look +module = sys._getframe(i).f_globals["__name__"] +filename = sys._getframe(i).f_code.co_filename +line = sys._getframe(i).f_lineno +subr = sys._getframe(i).f_code.co_name +has_args = bool(sys._getframe(i+1).f_code.co_argcount) + +# 'wantarray' is Perl specific + +#----------------------------- +me = whoami() +him = whowasi() + +def whoami(): + sys._getframe(1).f_code.co_name +def whowasi(): + sys._getframe(2).f_code.co_name +#----------------------------- + +# ^^PLEAC^^_10.5 +#----------------------------- +# Every variable name is a reference to an object, thus nothing special +# needs to be done to pass a list or a dict as a parameter. +list_diff(list1, list2) +#----------------------------- +# Note: if one parameter to zip() is longer it will be truncated +def add_vecpair(x, y): + return [x1+y1 for x1, y1 in zip(x, y)] + +a = [1, 2] +b = [5, 8] +print " ".join([str(n) for n in add_vecpair(a, b)]) +#=> 6 10 +#----------------------------- +# DO NOT DO THIS: +assert isinstance(x, type([])) and isinstance(y, type([])), \ + "usage: add_vecpair(list1, list2)" +#----------------------------- + +# ^^PLEAC^^_10.6 +#----------------------------- +# perl return context is not something standard in python... +# but still you can achieve something alike if you really need it +# (but you must really need it badly since you should never use this!!) +# +# see http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/284742 for more +# +# NB: it has been tested under Python 2.3.x and no guarantees can be given +# that it works under any future Python version. +import inspect,dis + +def expecting(): + """Return how many values the caller is expecting""" + f = inspect.currentframe().f_back.f_back + bytecode = f.f_code.co_code + i = f.f_lasti + instruction = ord(bytecode[i+3]) + if instruction == dis.opmap['UNPACK_SEQUENCE']: + howmany = ord(bytecode[i+4]) + return howmany + elif instruction == dis.opmap['POP_TOP']: + return 0 + return 1 + +def cleverfunc(): + howmany = expecting() + if howmany == 0: + print "return value discarded" + if howmany == 2: + return 1,2 + elif howmany == 3: + return 1,2,3 + return 1 + +cleverfunc() +x = cleverfunc() +print x +x,y = cleverfunc() +print x,y +x,y,z = cleverfunc() +print x,y,z + +# ^^PLEAC^^_10.7 +#----------------------------- +thefunc(increment= "20s", start="+5m", finish="+30m") +thefunc(start= "+5m",finish="+30m") +thefunc(finish= "+30m") +thefunc(start="+5m", increment="15s") +#----------------------------- +def thefunc(increment='10s', + finish='0', + start='0'): + if increment.endswith("m"): + pass +#----------------------------- + +# ^^PLEAC^^_10.8 +#----------------------------- +a, _, c = func() # Use _ as a placeholder... +a, ignore, c = func() # ...or assign to an otherwise unused variable +#----------------------------- + +# ^^PLEAC^^_10.9 +#----------------------------- +def somefunc(): + mylist = [] + mydict = {} + # ... + return mylist, mydict + +mylist, mydict = somefunc() +#----------------------------- +def fn(): + return a, b, c + +#----------------------------- +h0, h1, h2 = fn() +tuple_of_dicts = fn() # eg: tuple_of_dicts[2]["keystring"] +r0, r1, r2 = fn() # eg: r2["keystring"] + +#----------------------------- + +# ^^PLEAC^^_10.10 +#----------------------------- +# Note: Exceptions are almost always preferred to error values +return +#----------------------------- +def empty_retval(): + return None + +def empty_retval(): + return # identical to return None + +def empty_retval(): + pass # None returned by default (empty func needs pass) +#----------------------------- +a = yourfunc() +if a: + pass +#----------------------------- +a = sfunc() +if not a: + raise AssertionError("sfunc failed") + +assert sfunc(), "sfunc failed" +#----------------------------- + +# ^^PLEAC^^_10.11 +# Prototypes are inapplicable to Python as Python disallows calling +# functions without using brackets, and user functions are able to +# mimic built-in functions with no special actions required as they +# only flatten lists (and convert dicts to named arguments) if +# explicitly told to do so. Python functions use named parameters +# rather than shifting arguments: + +def myfunc(a, b, c=4): + print a, b, c + +mylist = [1,2] + +mydict1 = {"b": 2, "c": 3} +mydict2 = {"b": 2} + +myfunc(1,2,3) +#=> 1 2 3 + +myfunc(1,2) +#=> 1 2 4 + +myfunc(*mylist) +#=> 1 2 4 + +myfunc(5, *mylist) +#=> 5, 1, 2 + +myfunc(5, **mydict1) +#=> 5, 2, 3 + +myfunc(5, **mydict2) +#=> 5, 2, 4 + +myfunc(c=3, b=2, a=1) +#=> 1, 2, 3 + +myfunc(b=2, a=1) +#=> 1, 2, 4 + +myfunc(mylist, mydict1) +#=> [1, 2] {'c': 3, 'b': 2} 4 + +# For demonstration purposes only - don't do this +def mypush(mylist, *vals): + mylist.extend(vals) + +mylist = [] +mypush(mylist, 1, 2, 3, 4, 5) +print mylist +#=> [1, 2, 3, 4, 5] + +# ^^PLEAC^^_10.12 +#----------------------------- +raise ValueError("some message") # specific exception class +raise Exception("use me rarely") # general exception +raise "don't use me" # string exception (deprecated) +#----------------------------- +# Note that bare excepts are considered bad style. Normally you should +# trap specific exceptions. For instance these bare excepts will +# catch KeyboardInterrupt, SystemExit, and MemoryError as well as +# more common errors. In addition they force you to import sys to +# get the error message. +import warnings, sys +try: + func() +except: + warnings.warn("func raised an exception: " + str(sys.exc_info()[1])) +#----------------------------- +try: + func() +except: + warnings.warn("func blew up: " + str(sys.exc_info()[1])) +#----------------------------- +class MoonPhaseError(Exception): + def __init__(self, phase): + self.phase = phase +class FullMoonError(MoonPhaseError): + def __init__(self): + MoonPhaseError.__init__("full moon") + +def func(): + raise FullMoonError() + +# Ignore only FullMoonError exceptions +try: + func() +except FullMoonError: + pass +#----------------------------- +# Ignore only MoonPhaseError for a full moon +try: + func() +except MoonPhaseError, err: + if err.phase != "full moon": + raise +#----------------------------- + +# ^^PLEAC^^_10.13 +# There is no direct equivalent to 'local' in Python, and +# it's impossible to write your own. But then again, even in +# Perl it's considered poor style. + +# DON'T DO THIS (You probably shouldn't use global variables anyway): +class Local(object): + def __init__(self, globalname, val): + self.globalname = globalname + self.globalval = globals()[globalname] + globals()[globalname] = val + + def __del__(self): + globals()[self.globalname] = self.globalval + +foo = 4 + +def blah(): + print foo + +def blech(): + temp = Local("foo", 6) + blah() + +blah() +blech() +blah() + +#----------------------------- + +# ^^PLEAC^^_10.14 +#----------------------------- +grow = expand +grow() # calls expand() + +#----------------------------- +one.var = two.table # make one.var the same as two.table +one.big = two.small # make one.big the same as two.small +#----------------------------- +fred = barney # alias fred to barney +#----------------------------- +s = red("careful here") +print s +#> <FONT COLOR='red'>careful here</FONT> +#----------------------------- +# Note: the 'text' should be HTML escaped if it can contain +# any of the characters '<', '>' or '&' +def red(text): + return "<FONT COLOR='red'>" + text + "</FONT>" +#----------------------------- +def color_font(color, text): + return "<FONT COLOR='%s'>%s</FONT>" % (color, text) + +def red(text): return color_font("red", text) +def green(text): return color_font("green", text) +def blue(text): return color_font("blue", text) +def purple(text): return color_font("purple", text) +# etc +#----------------------------- +# This is done in Python by making an object, instead of +# saving state in a local anonymous context. +class ColorFont: + def __init__(self, color): + self.color = color + def __call__(self, text): + return "<FONT COLOR='%s'>%s</FONT>" % (self.color, text) + +colors = "red blue green yellow orange purple violet".split(" ") +for name in colors: + globals()[name] = ColorFont(name) +#----------------------------- +# If you really don't want to make a new class, you can +# fake it somewhat by passing in default args. +colors = "red blue green yellow orange purple violet".split(" ") +for name in colors: + def temp(text, color = name): + return "<FONT COLOR='%s'>%s</FONT>" % (color, text) + globals()[name] = temp + +#----------------------------- + +# ^^PLEAC^^_10.15 + +# Python has the ability to derive from ModuleType and add +# new __getattr__ and __setattr__ methods. I don't know the +# expected way to use them to emulate Perl's AUTOLOAD. Instead, +# here's how something similar would be done in Python. This +# uses the ColorFont defined above. + +#----------------------------- +class AnyColor: + def __getattr__(self, name): + return ColorFont(name) + +colors = AnyColor() + +print colors.chartreuse("stuff") + +#----------------------------- +## Skipping this translation because 'local' is too Perl +## specific, and there isn't enough context to figure out +## what this is supposed to do. +#{ +# local *yellow = \&violet; +# local (*red, *green) = (\&green, \&red); +# print_stuff(); +#} +#----------------------------- + +# ^^PLEAC^^_10.16 +#----------------------------- +def outer(arg1): + x = arg1 + 35 + def inner(): + return x * 19 + return x + inner() +#----------------------------- + +# ^^PLEAC^^_10.17 +#----------------------------- +import mailbox, sys +mbox = mailbox.PortableUnixMailbox(sys.stdin) + +def extract_data(msg, idx): + subject = msg.getheader("Subject", "").strip() + if subject[:3].lower() == "re:": + subject = subject[3:].lstrip() + text = msg.fp.read() + return subject, idx, msg, text +messages = [extract_data(idx, msg) for idx, msg in enumerate(mbox)] + +#----------------------------- +# Sorts by subject then by original position in the list +for subject, pos, msg, text in sorted(messages): + print "%s\n%s"%(msg, text) + +#----------------------------- +# Sorts by subject then date then original position +def subject_date_position(elem): + return (elem[0], elem[2].getdate("Date"), elem[1]) +messages.sort(key=subject_date_position) + +# Pre 2.4: +messages = sorted(messages, key=subject_date_position) +#----------------------------- + +# @@PLEAC@@_11.0 +#Introduction. +# In Python, all names are references. +# All objects are inherently anonymous, they don't know what names refer to them. +print ref # prints the value that the name ref refers to. +ref = 3 # assigns the name ref to the value 3. +#----------------------------- +aref = mylist +#----------------------------- +aref = [3, 4, 5] # aref is a name for this list +href = {"How": "Now", "Brown": "Cow"} # href is a name for this dictionary +#----------------------------- +# Python doesn't have autovivification as (for simple types) there is no difference between a name and a reference. +# If we try the equivalent of the Perl code we get the list, not a reference to the list. +#----------------------------- +# To handle multidimensional arrays, you should use an extension to Python, +# such as numarray (http://www.stsci.edu/resources/software_hardware/numarray) +#----------------------------- +# In Python, assignment doesn't return anything. +#----------------------------- +Nat = { "Name": "Leonhard Euler", + "Address": "1729 Ramanujan Lane\nMathworld, PI 31416", + "Birthday": 0x5bb5580 +} +#----------------------------- + +# @@PLEAC@@_11.1 +aref = mylist +anon_list = [1, 3, 5, 7, 9] +anon_copy = anon_list +implicit_creation = [2, 4, 6, 8, 10] +#----------------------------- +anon_list.append(11) +#----------------------------- +two = implicit_creation[0] +#----------------------------- +# To get the last index of a list, you can use len() +# [or list.__len__() - but don't] directly +last_idx = len(aref) - 1 + +# Normally, though, you'd use an index of -1 for the last +# element, -2 for the second last, etc. +print implicit_creation[-1] +#=> 10 + +num_items = len(aref) +#----------------------------- +last_idx = aref.__len__() - 1 +num_items = aref.__len__() +#----------------------------- +if not isinstance(someVar, type([])): + print "Expected a list" +#----------------------------- +print list_ref +#----------------------------- +# sort is in place. +list_ref.sort() +#----------------------------- +list_ref.append(item) +#----------------------------- +def list_ref(): + return [] + +aref1 = list_ref() +aref2 = list_ref() +# aref1 and aref2 point to different lists. +#----------------------------- +list_ref[N] # refers to the Nth item in the list_ref list. +#----------------------------- +# The following two statements are equivalent and return up to 3 elements +# at indices 3, 4, and 5 (if they exist). +pie[3:6] +pie[3:6:1] +#----------------------------- +# This will insert 3 elements, overwriting elements at indices 3,4, or 5 - if they exist. +pie[3:6] = ["blackberry", "blueberry", "pumpkin"] +#----------------------------- +for item in pie: + print item + +# DON'T DO THIS (this type of indexing should be done with enumerate) +# xrange does not create a list 0..len(pie) - 1, it creates an object +# that returns one index at a time. +for idx in xrange(len(pie)): + print pie[idx] + +# @@PLEAC@@_11.2 +# Making Hashes of Arrays + +hash["KEYNAME"].append("new value") + +for mystr in hash.keys(): + print "%s: %s" % (mystr, hash[mystr]) + +hash["a key"] = [3, 4, 5] + +values = hash["a key"] + +hash["a key"].append(value) + +# autovivification also does not work in python. +residents = phone2name[number] +# do this instead +residents = phone2name.get(number, []) + + +# @@PLEAC@@_11.3 +# Taking References to Hashes + +href = hash +anon_hash = { "key1":"value1", "key2" : "value2 ..." } +anon_hash_copy = anon_hash.copy() + +hash = href +value = href[key] +slice = [href[k] for k in (key1, key2, key3)] +keys = hash.keys() + +import types +if type(someref) != types.DictType: + raise "Expected a dictionary, not %s" % type(someref) +if isinstance(someref,dict): + raise "Expected a dictionary, not %s" % type(someref) + +for href in ( ENV, INC ): + for key in href.keys(): + print "%s => %s" % (key, href[key]) + +values = [hash_ref[k] for k in (key1, key2, key3)] + +for key in ("key1", "key2", "key3"): + hash_ref[k] += 7 # not like in perl but the same result. +#----------------------------- + +# @@PLEAC@@_11.4 +#----------------------------- +cref = func +cref = lambda a, b: ... +#----------------------------- +returned = cref(arguments) +#----------------------------- +funcname = "thefunc" +locals()[funcname](); +#----------------------------- +commands = { + 'happy': joy, + 'sad': sullen, + 'done': (lambda : sys.exit()), # In this case "done: sys.exit" would suffice + 'mad': angry, + } + +print "How are you?", +cmd = raw_input() +if cmd in commands: + commands[cmd]() +else: + print "No such command: %s" % cmd +#----------------------------- +def counter_maker(): + start = [0] + def counter_function(): + # start refers to the variable defined in counter_maker, but + # we can't reassign or increment variables in parent scopes. + # By using a one-element list we can modify the list without + # reassigning the variable. This way of using a list is very + # like a reference. + start[0] += 1 + return start[0]-1 + return counter_function + +counter = counter_maker() +for i in range(5): + print counter() +#----------------------------- +counter1 = counter_maker() +counter2 = counter_maker() + +for i in range(5): + print counter1() +print counter1(), counter2() +#=> 0 +#=> 1 +#=> 2 +#=> 3 +#=> 4 +#=> 5 0 +#----------------------------- +import time +def timestamp(): + start_time = time.time() + def elapsed(): + return time.time() - start_time + return elapsed +early = timestamp() +time.sleep(20) +later = timestamp() +time.sleep(10) +print "It's been %d seconds since early" % early() +print "It's been %d seconds since later" % later() +#=> It's been 30 seconds since early. +#=> It's been 10 seconds since later. +#----------------------------- + +# @@PLEAC@@_11.5 +# A name is a reference to an object and an object can be referred to +# by any number of names. There is no way to manipulate pointers or +# an object's id. This section is thus inapplicable. +x = 1 +y = x +print x, id(x), y, id(y) +x += 1 # "x" now refers to a different object than y +print x, id(x), y, id(y) +y = 4 # "y" now refers to a different object than it did before +print x, id(x), y, id(y) + +# Some objects (including ints and strings) are immutable, however, which +# can give the illusion of a by-value/by-reference distinction: +a = x = [1] +b = y = 1 +c = z = "s" +print a, b, c +#=> [1] 1 s + +x += x # calls list.__iadd__ which is inplace. +y += y # can't find int.__iadd__ so calls int.__add__ which isn't inplace +z += z # can't find str.__iadd__ so calls str.__add__ which isn't inplace +print a, b, c +#=> [1, 1] 1 s + +# @@PLEAC@@_11.6 +# As indicated by the previous section, everything is referenced, so +# just create a list as normal, and beware that augmented assignment +# works differently with immutable objects to mutable ones: +mylist = [1, "s", [1]] +print mylist +#=> [1, s, [1]] + +for elem in mylist: + elem *= 2 +print mylist +#=> [1, s, [1, 1]] + +mylist[0] *= 2 +mylist[-1] *= 2 +print mylist +#=> [1, s, [1, 1, 1, 1]] + +# If you need to modify every value in a list, you should use a list comprehension +# which does NOT modify inplace: +import math +mylist = [(val**3 * 4/3*math.pi) for val in mylist] + +# @@PLEAC@@_11.7 +#----------------------------- +c1 = mkcounter(20) +c2 = mkcounter(77) + +print "next c1: %d" % c1['next']() # 21 +print "next c2: %d" % c2['next']() # 78 +print "next c1: %d" % c1['next']() # 22 +print "last c1: %d" % c1['prev']() # 21 +print "old c2: %d" % c2['reset']() # 77 +#----------------------------- +# DON'T DO THIS. Use an object instead +def mkcounter(start): + count = [start] + def next(): + count[0] += 1 + return count[0] + def prev(): + count[0] -= 1 + return count[0] + def get(): + return count[0] + def set(value): + count[0] = value + return count[0] + def bump(incr): + count[0] += incr + return count[0] + def reset(): + count[0] = start + return count[0] + return { + 'next': next, 'prev': prev, 'get': get, 'set': set, + 'bump': bump, 'reset': reset, 'last': prev} +#----------------------------- + +# @@PLEAC@@_11.8 +#----------------------------- +mref = obj.meth +# later... +mref("args", "go", "here") +#----------------------------- + +# @@PLEAC@@_11.9 +#----------------------------- +record = { + "name": "Jason", + "empno": 132, + "title": "deputy peon", + "age": 23, + "salary": 37000, + "pals": ["Norbert", "Rhys", "Phineas"], +} +print "I am %s, and my pals are %s." % (record["name"], + ", ".join(record["pals"])) +#----------------------------- +byname = {} +byname[record["name"]] = record + +rp = byname.get("Aron") +if rp: + print "Aron is employee %d."% rp["empno"] + +byname["Jason"]["pals"].append("Theodore") +print "Jason now has %d pals." % len(byname["Jason"]["pals"]) + +for name, record in byname.items(): + print "%s is employee number %d." % (name, record["empno"]) + +employees = {} +employees[record["empno"]] = record; + +# lookup by id +rp = employees.get(132) +if (rp): + print "Employee number 132 is %s." % rp["name"] + +byname["Jason"]["salary"] *= 1.035 + +peons = [r for r in employees.values() if r["title"] == "peon"] +tsevens = [r for r in employees.values() if r["age"] == 27] + +# Go through all records +print employees.values() + +for rp in sorted(employees.values(), key=lambda x:x["age"]): + print "%s is age %d."%(rp["name"], rp["age"]) + +# use @byage, an array of arrays of records +byage = {} +byage[record["age"]] = byage.get(record["age"], []) + [record] + +for age, records in byage.items(): + print records + print "Age %s:"%age, + for rp in records: + print rp["name"], + print +#----------------------------- + +# @@PLEAC@@_11.10 +#----------------------------- +FieldName: Value +#----------------------------- +for record in list_of_records: + # Note: sorted added in Python 2.4 + for key in sorted(record.keys()): + print "%s: %s" % (key, record[key]) + print +#----------------------------- +import re +list_of_records = [{}] +while True: + line = sys.stdin.readline() + if not line: + # EOF + break + # Remove trailing \n: + line = line[:1] + if not line.strip(): + # New record + list_of_records.append({}) + continue + key, value = re.split(r':\s*', line, 1) + # Assign the key/value to the last item in the list_of_records: + list_of_records[-1][key] = value +#----------------------------- +# @@PLEAC@@_11.11 +import pprint + +mylist = [[1,2,3], [4, [5,6,7], 8,9, [0,3,5]], 7, 8] +mydict = {"abc": "def", "ghi":[1,2,3]} +pprint.pprint(mylist, width=1) + +fmtdict = pprint.pformat(mydict, width=1) +print fmtdict +# "import pprint; help(pprint)" for more details + +# @@INCOMPLETE@@ +# Note that pprint does not currently handle user objects + +#----------------------------- +# @@PLEAC@@_11.12 +newlist = list(mylist) # shallow copy +newdict = dict(mydict) # shallow copy + +# Pre 2.3: +import copy +newlist = copy.copy(mylist) # shallow copy +newdict = copy.copy(mydict) # shallow copy + +# shallow copies copy a data structure, but don't copy the items in those +# data structures so if there are nested data structures, both copy and +# original will refer to the same object +mylist = ["1", "2", "3"] +newlist = list(mylist) +mylist[0] = "0" +print mylist, newlist +#=> ['0', '2', '3'] ['1', '2', '3'] + +mylist = [["1", "2", "3"], 4] +newlist = list(mylist) +mylist[0][0] = "0" +print mylist, newlist +#=> [['0', '2', '3'], 4] [['0', '2', '3'], 4] +#----------------------------- +import copy +newlist = copy.deepcopy(mylist) # deep copy +newdict = copy.deepcopy(mydict) # deep copy + +# deep copies copy a data structure recursively: +import copy + +mylist = [["1", "2", "3"], 4] +newlist = copy.deepcopy(mylist) +mylist[0][0] = "0" +print mylist, newlist +#=> [['0', '2', '3'], 4] [['1', '2', '3'], 4] +#----------------------------- +# @@PLEAC@@_11.13 +import pickle +class Foo(object): + def __init__(self): + self.val = 1 + +x = Foo() +x.val = 3 +p_x = pickle.dumps(x) # Also pickle.dump(x, myfile) which writes to myfile +del x +x = pickle.loads(p_x) # Also x = pickle.load(myfile) which loads from myfile +print x.val +#=> 3 +#----------------------------- +# @@PLEAC@@_11.14 +import os, shelve +fname = "testfile.db" +if not os.path.exists(fname): + d = shelve.open("testfile.db") + for i in range(100000): + d[str(i)] = i + d.close() + +d = shelve.open("testfile.db") +print d["100"] +print d["1212010201"] # KeyError +#----------------------------- + +# @@PLEAC@@_11.15 +# bintree - binary tree demo program +# Use the heapq module instead? +import random +import warnings + +class BTree(object): + def __init__(self): + self.value = None + + ### insert given value into proper point of + ### the tree, extending this node if necessary. + def insert(self, value): + if self.value is None: + self.left = BTree() + self.right = BTree() + self.value = value + elif self.value > value: + self.left.insert(value) + elif self.value < value: + self.right.insert(value) + else: + warnings.warn("Duplicate insertion of %s."%value) + + # recurse on left child, + # then show current value, + # then recurse on right child. + def in_order(self): + if self.value is not None: + self.left.in_order() + print self.value, + self.right.in_order() + + # show current value, + # then recurse on left child, + # then recurse on right child. + def pre_order(self): + if self.value is not None: + print self.value, + self.left.pre_order() + self.right.pre_order() + + # recurse on left child, + # then recurse on right child, + # then show current value. + def post_order(self): + if self.value is not None: + self.left.post_order() + self.right.post_order() + print self.value, + + # find out whether provided value is in the tree. + # if so, return the node at which the value was found. + # cut down search time by only looking in the correct + # branch, based on current value. + def search(self, value): + if self.value is not None: + if self.value == value: + return self + if value < self.value: + return self.left.search(value) + else: + return self.right.search(value) + +def test(): + root = BTree() + + for i in range(20): + root.insert(random.randint(1, 1000)) + + # now dump out the tree all three ways + print "Pre order: ", root.pre_order() + print "In order: ", root.in_order() + print "Post order:", root.post_order() + + ### prompt until empty line + while True: + val = raw_input("Search? ").strip() + if not val: + break + val = int(val) + found = root.search(val) + if found: + print "Found %s at %s, %s"%(val, found, found.value) + else: + print "No %s in tree" % val + +if __name__ == "__main__": + test() + + +# ^^PLEAC^^_12.0 +#----------------------------- +## Python's "module" is the closest equivalent to Perl's "package" + + +#=== In the file "Alpha.py" +name = "first" + +#=== End of file + +#=== In the file "Omega.py" + +name = "last" +#=== End of file + +import Alpha, Omega +print "Alpha is %s, Omega is %s." % (Alpha.name, Omega.name) +#> Alpha is first, Omega is last. +#----------------------------- +# Python does not have an equivalent to "compile-time load" +import sys + +# Depending on the implementation, this could use a builtin +# module or load a file with the extension .py, .pyc, pyo, .pyd, +# .so, .dll, or (with imputils) load from other files. +import Cards.Poker + +#----------------------------- +#=== In the file Cards/Poker.py +__all__ = ["card_deck", "shuffle"] # not usually needed +card_deck = [] +def shuffle(): + pass + +#----------------------------- + +# ^^PLEAC^^_12.1 +#----------------------------- +#== In the file "YourModule.py" + +__version__ = (1, 0) # Or higher +__all__ = ["...", "..."] # Override names included in "... import *" + # Note: 'import *' is considered poor style + # and it is rare to use this variable. +######################## +# your code goes here +######################## + +#----------------------------- +import YourModule # Import the module into my package + # (does not import any of its symbols) + +import YourModule as Module # Use a different name for the module + +from YourModule import * # Import all module symbols not starting + # with an underscore (default); if __all__ + # is defined, only imports those symbols. + # Using this is discouraged unless the + # module is specifically designed for it. + +from YourModule import name1, name2, xxx + # Import the named symbols from the module + +from YourModule import name1 as name2 + # Import the named object, but use a + # different name to access it locally. + +#----------------------------- +__all__ = ["F1", "F2", "List"] +#----------------------------- +__all__ = ["Op_Func", "Table"] +#----------------------------- +from YourModule import Op_Func, Table, F1 +#----------------------------- +from YourModule import Functions, Table +#----------------------------- + +# ^^PLEAC^^_12.2 +#----------------------------- +# no import +mod = "module" +try: + __import__(mod) +except ImportError, err: + raise ImportError("couldn't load %s: %s" % (mod, err)) + +# imports into current package +try: + import module +except ImportError, err: + raise ImportError("couldn't load 'module': %s" % (err, )) + +# imports into current package, if the name is known +try: + import module +except ImportError, err: + raise ImportError("couldn't load 'module': %s" % (err, )) + +# Use a fixed local name for a named module +mod = "module" +try: + local_name = __import__(mod) +except ImportError, err: + raise ImportError("couldn't load %s: %s" % (mod, err)) + +# Use the given name for the named module. +# (You probably don't need to do this.) +mod = "module" +try: + globals()[mod] = __import__(mod) +except ImportError, err: + raise ImportError("couldn't load %s: %s" % (mod, err)) + +#----------------------------- +DBs = "Giant.Eenie Giant.Meanie Mouse.Mynie Moe".split() +for mod in DBs.split(): + try: + loaded_module = __import__(mod) + except ImportError: + continue + # __import__ returns a reference to the top-most module + # Need to get the actual submodule requested. + for term in mod.split(".")[:-1]: + loaded_module = getattr(loaded_module, term) + break +else: + raise ImportError("None of %s loaded" % DBs) +#----------------------------- + +# ^^PLEAC^^_12.3 +#----------------------------- +import sys +if __name__ == "__main__": + if len(sys.argv) != 3 or not sys.argv[1].isdigit() \ + or not sys.argv[2].isdigit(): + raise SystemExit("usage: %s num1 num2" % sys.argv[0]) + +import Some.Module +import More.Modules +#----------------------------- +if opt_b: + import math +#----------------------------- +from os import O_EXCL, O_CREAT, O_RDWR + +#----------------------------- +import os +O_EXCL = os.O_EXCL +O_CREAT = os.O_CREAT +O_RDWR = os.O_RDWR +#----------------------------- +import os +O_EXCL, O_CREAT, O_RDWR = os.O_EXCL, os.O_CREAT, os.O_RDWR +#----------------------------- +load_module('os', "O_EXCL O_CREAT O_RDWR".split()) + +def load_module(module_name, symbols): + module = __import__(module_name) + for symbol in symbols: + globals()[symbol] = getattr(module, symbol) +#----------------------------- + +# ^^PLEAC^^_12.4 +#----------------------------- +# Python doesn't have Perl-style packages + +# Flipper.py +__version__ = (1, 0) + +__all__ = ["flip_boundary", "flip_words"] + +Separatrix = ' ' # default to blank + +def flip_boundary(sep = None): + prev_sep = Separatrix + if sep is not None: + global Separatrix + Separatrix = sep + return prev_sep + +def flip_words(line): + words = line.split(Separatrix) + words.reverse() + return Separatrix.join(words) +#----------------------------- + +# ^^PLEAC^^_12.5 +#----------------------------- +this_pack = __name__ +#----------------------------- +that_pack = sys._getframe(1).f_globals.get("__name__", "<string>") +#----------------------------- +print "I am in package", __name__ +#----------------------------- +def nreadline(count, myfile): + if count <= 0: + raise ValueError("Count must be > 0") + return [myfile.readline() for i in range(count)] + +def main(): + myfile = open("/etc/termcap") + a, b, c = nreadline(3, myfile) + myfile.close() + +if __name__ == "__main__": + main() + +# DON'T DO THIS: +import sys + +def nreadline(count, handle_name): + assert count > 0, "count must be > 0" + locals = sys._getframe(1).f_locals + if not locals.has_key(handle_name): + raise AssertionError("need open filehandle") + infile = locals[handle_name] + retlist = [] + for line in infile: + retlist.append(line) + count -= 1 + if count == 0: + break + return retlist + +def main(): + FH = open("/etc/termcap") + a, b, c = nreadline(3, "FH") + +if __name__ == "__main__": + main() +#----------------------------- + +# ^^PLEAC^^_12.6 +#----------------------------- +## There is no direct equivalent in Python to an END block +import time, os, sys + +# Tricks to ensure the needed functions exist during module cleanup +def _getgmtime(asctime=time.asctime, gmtime=time.gmtime, + t=time.time): + return asctime(gmtime(t())) + +class Logfile: + def __init__(self, file): + self.file = file + + def _logmsg(self, msg, argv0=sys.argv[0], pid=os.getpid(), + _getgmtime=_getgmtime): + # more tricks to keep all needed references + now = _getgmtime() + print>>self.file, argv0, pid, now + ":", msg + + def logmsg(self, msg): + self._logmsg(self.file, msg) + + def __del__(self): + self._logmsg("shutdown") + self.file.close() + + def __getattr__(self, attr): + # forward everything else to the file handle + return getattr(self.file, attr) + +# 0 means unbuffered +LF = Logfile(open("/tmp/mylog", "a+", 0)) +logmsg = LF.logmsg + +#----------------------------- +## It is more appropriate to use try/finally around the +## main code, so the order of initialization and finalization +## can be specified. +if __name__ == "__main__": + import logger + logger.init("/tmp/mylog") + try: + main() + finally: + logger.close() + +#----------------------------- + +# ^^PLEAC^^_12.7 +#----------------------------- +#% python -c 'import sys\ +for i, name in zip(xrange(sys.maxint), sys.path):\ + print i, repr(name) +#> 0 '' +#> 1 '/usr/lib/python2.2' +#> 2 '/usr/lib/python2.2/plat-linux2' +#> 3 '/usr/lib/python2.2/lib-tk' +#----------------------------- +# syntax for sh, bash, ksh, or zsh +#$ export PYTHONPATH=$HOME/pythonlib + +# syntax for csh or tcsh +#% setenv PYTHONPATH ~/pythonlib +#----------------------------- +import sys +sys.path.insert(0, "/projects/spectre/lib") +#----------------------------- +import FindBin +sys.path.insert(0, FindBin.Bin) +#----------------------------- +import FindBin +Bin = "Name" +bin = getattr(FindBin, Bin) +sys.path.insert(0, bin + "/../lib") +#----------------------------- + +# ^^PLEAC^^_12.8 +#----------------------------- +#% h2xs -XA -n Planets +#% h2xs -XA -n Astronomy::Orbits +#----------------------------- +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ +# Need a distutils example +#----------------------------- + +# ^^PLEAC^^_12.9 +#----------------------------- +# Python compiles a file to bytecode the first time it is imported and +# stores this compiled form in a .pyc file. There is thus less need for +# incremental compilation as once there is a .pyc file, the sourcecode +# is only recompiled if it is modified. + +# ^^PLEAC^^_12.10 +#----------------------------- +# See previous section + +# ^^PLEAC^^_12.11 +#----------------------------- +## Any definition in a Python module overrides the builtin +## for that module + +#=== In MyModule +def open(): + pass # TBA +#----------------------------- +from MyModule import open +file = open() +#----------------------------- + +# ^^PLEAC^^_12.12 +#----------------------------- +def even_only(n): + if n & 1: # one way to test + raise AssertionError("%s is not even" % (n,)) + #.... + +#----------------------------- +def even_only(n): + if n % 2: # here's another + # choice of exception depends on the problem + raise TypeError("%s is not even" % (n,)) + #.... + +#----------------------------- +import warnings +def even_only(n): + if n & 1: # test whether odd number + warnings.warn("%s is not even, continuing" % (n)) + n += 1 + #.... +#----------------------------- +warnings.filterwarnings("ignore") +#----------------------------- + +# ^^PLEAC^^_12.13 +#----------------------------- +val = getattr(__import__(packname), varname) +vals = getattr(__import__(packname), aryname) +getattr(__import__(packname), funcname)("args") + +#----------------------------- +# DON'T DO THIS [Use math.log(val, base) instead] +import math +def make_log(n): + def logn(val): + return math.log(val, n) + return logn + +# Modifying the global dictionary - this could also be done +# using locals(), or someobject.__dict__ +globaldict = globals() +for i in range(2, 1000): + globaldict["log%s"%i] = make_log(i) + +# DON'T DO THIS +for i in range(2,1000): + exec "log%s = make_log(i)"%i in globals() + +print log20(400) +#=>2.0 +#----------------------------- +blue = colours.blue +someobject.blue = colours.azure # someobject could be a module... +#----------------------------- + +# ^^PLEAC^^_12.14 +#----------------------------- +# Python extension modules can be imported and used just like +# a pure python module. +# +# See http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/ for +# information on how to create extension modules in Pyrex [a +# language that's basically Python with type definitions which +# converts to compiled C code] +# +# See http://www.boost.org/libs/python/doc/ for information on how +# to create extension modules in C++. +# +# See http://www.swig.org/Doc1.3/Python.html for information on how +# to create extension modules in C/C++ +# +# See http://docs.python.org/ext/ext.html for information on how to +# create extension modules in C/C++ (manual reference count management). +# +# See http://cens.ioc.ee/projects/f2py2e/ for information on how to +# create extension modules in Fortran +# +# See http://www.scipy.org/Weave for information on how to +# include inline C code in Python code. +# +# @@INCOMPLETE@@ Need examples of FineTime extensions using the different methods... +#----------------------------- + +# ^^PLEAC^^_12.15 +#----------------------------- +# See previous section +#----------------------------- + +# ^^PLEAC^^_12.16 +#----------------------------- +# To document code, use docstrings. A docstring is a bare string that +# is placed at the beginning of a module or immediately after the +# definition line of a class, method, or function. Normally, the +# first line is a brief description of the object; if a longer +# description is needed, it commences on the third line (the second +# line being left blank). Multiline comments should use triple +# quoted strings. +# +# Docstrings are automagically assigned to an object's __doc__ property. +# +# In other words these three classes are identical: +class Foo(object): + "A class demonstrating docstrings." + +class Foo(object): + __doc__ = "A class demonstrating docstrings." + +class Foo(object): + pass +Foo.__doc__ = "A class demonstrating docstrings." + +# as are these two functions: +def foo(): + "A function demonstrating docstrings." + +def foo(): + pass +foo.__doc__ = "A function demonstrating docstrings." + +# the pydoc module is used to display a range of information about +# an object including its docstrings: +import pydoc +print pydoc.getdoc(int) +pydoc.help(int) + +# In the interactive interpreter, objects' documentation can be +# using the help function: +help(int) + +#----------------------------- + +# ^^PLEAC^^_12.17 +#----------------------------- +# Recent Python distributions are built and installed with disutils. +# +# To build and install under unix +# +# % python setup.py install +# +# If you want to build under one login and install under another +# +# % python setup.py build +# $ python setup.py install +# +# A package may also be available prebuilt, eg, as an RPM or Windows +# installer. Details will be specific to the operating system. + +#----------------------------- +# % python setup.py --prefix ~/python-lib +#----------------------------- + + +# ^^PLEAC^^_12.18 +#----------------------------- +#== File Some/Module.py + +# There are so many differences between Python and Perl that +# it isn't worthwhile trying to come up with an equivalent to +# this Perl code. The Python code is much smaller, and there's +# no need to have a template. + +#----------------------------- + +# ^^PLEAC^^_12.19 +#----------------------------- +#% pmdesc +#----------------------------- +import sys, pydoc + +def print_module_info(path, modname, desc): + # Skip files starting with "test_" + if modname.split(".")[-1].startswith("test_"): + return + try: + # This assumes the modules are safe for importing, + # in that they don't have side effects. Could also + # grep the file for the __version__ line. + mod = pydoc.safeimport(modname) + except pydoc.ErrorDuringImport: + return + version = getattr(mod, "__version__", "unknown") + if isinstance(version, type("")): + # Use the string if it's given + pass + else: + # Assume it's a list of version numbers, from major to minor + ".".join(map(str, version)) + synopsis, text = pydoc.splitdoc(desc) + print "%s (%s) - %s" % (modname, version, synopsis) + +scanner = pydoc.ModuleScanner() +scanner.run(print_module_info) + +#----------------------------- + + +# ^^PLEAC^^_13.0 +#----------------------------- +# Inside a module named 'Data' / file named 'Data.py' +class Encoder(object): + pass +#----------------------------- +obj = [3, 5] +print type(obj), id(obj), ob[1] + +## Changing the class of builtin types is not supported +## in Python. + +#----------------------------- +obj.Stomach = "Empty" # directly accessing an object's contents +obj.NAME = "Thag" # uppercase field name to make it stand out +(optional) +#----------------------------- +encoded = object.encode("data") +#----------------------------- +encoded = Data.Encoder.encode("data") +#----------------------------- +class Class(object): + def __init__(self): + pass +#----------------------------- +object = Class() +#----------------------------- +class Class(object): + def class_only_method(): + pass # more code here + class_only_method = staticmethod(class_only_method) + +#----------------------------- +class Class(object): + def instance_only_method(self): + pass # more code here +#----------------------------- +lector = Human.Cannibal() +lector.feed("Zak") +lector.move("New York") +#----------------------------- +# NOTE: it is rare to use these forms except inside of +# methods to call specific methods from a parent class +lector = Human.Cannibal() +Human.Cannibal.feed(lector, "Zak") +Human.Cannibal.move(lector, "New York") +#----------------------------- +print>>sys.stderr, "stuff here\n" + +# ^^PLEAC^^_13.1 +#----------------------------- +class Class(object): + pass +#----------------------------- +import time +class Class(object): + def __init__(self): + self.start = time.time() # init data fields + self.age = 0 +#----------------------------- +import time +class Class(object): + def __init__(self, **kwargs): + # Sets self.start to the current time, and self.age to 0. If called + # with arguments, interpret them as key+value pairs to + # initialize the object with + self.age = 0 + self.__dict__.update(kwargs) +#----------------------------- + +# ^^PLEAC^^_13.2 +#----------------------------- +import time +class Class(object): + def __del__(self): + print self, "dying at", time.ctime() +#----------------------------- +## Why is the perl code introducing a cycle? I guess it's an +## example of how to keep from calling the finalizer +self.WHATEVER = self +#----------------------------- + +# ^^PLEAC^^_13.3 +#----------------------------- +# It is standard practice to access attributes directly: +class MyClass(object) + def __init__(self): + self.name = "default" + self.age = 0 +obj = MyClass() +obj.name = "bob" +print obj.name +obj.age += 1 + +# If you later find that you need to compute an attribute, you can always +# retrofit a property(), leaving user code untouched: +class MyClass(object): + def __init__(self): + self._name = "default" + self._age = 0 + + def get_name(self): + return self._name + def set_name(self, name): + self._name = name.title() + name = property(get_name, set_name) + + def get_age(self): + return self._age + def set_age(self, val): + if val < 0: + raise ValueError("Invalid age: %s" % val) + self._age = val + age = property(get_age, set_age) +obj = MyClass() +obj.name = "bob" +print obj.name +obj.age += 1 + +# DON'T DO THIS - explicit getters and setters should not be used: +class MyClass(object): + def __init__(self): + self.name = "default" + def get_name(self): + return self.name + def set_name(self, name): + self.name = name.title() +obj = MyClass() +obj.set_name("bob") +print obj.get_name() +#----------------------------- +## DON'T DO THIS (It's complex, ugly, and unnecessary): +class MyClass(object): + def __init__(self): + self.age = 0 + def name(self, *args): + if len(args) == 0: + return self.name + elif len(args) == 1: + self.name = args[0] + else: + raise TypeError("name only takes 0 or 1 arguments") + def age(self, *args): + prev = self.age + if args: + self.age = args[0] + return prev + +# sample call of get and set: happy birthday! +obj.age(1 + obj.age()) + +#----------------------------- +him = Person() +him.NAME = "Sylvester" +him.AGE = 23 +#----------------------------- +# Here's another way to implement the 'obj.method()' is a getter +# and 'obj.method(value)' is a settor. Again, this is not a +# common Python idiom and should not be used. See below for a +# more common way to do parameter checking of attribute assignment. + +import re, sys + +def carp(s): + sys.stderr.write("WARNING: " + s + "\n") + +class Class: + no_name = [] + + def name(self, value = no_name): + if value is Class.no_name: + return self.NAME + value = self._enforce_name_value(value) + self.NAME = value + + def _enforce_name_value(self, value): + if re.search(r"[^\s\w'-]", value): + carp("funny characters in name") + if re.search(r"\d", value): + carp("numbers in name") + if not re.search(r"\S+(\s+\S+)+", value): + carp("prefer multiword name") + if not re.search(r"\S", value): + carp("name is blank") + return value.upper() # enforce capitalization +#----------------------------- +# A more typical way to enforce restrictions on a value +# to set +class Class: + def __setattr__(self, name, value): + if name == "name": + value = self._enforce_name_value(value) # Do any conversions + self.__dict__[name] = value # Do the default __setattr__ action + + def _enforce_name_value(self, value): + if re.search(r"[^\s\w'-]", value): + carp("funny characters in name") + if re.search(r"\d", value): + carp("numbers in name") + if not re.search(r"\S+(\s+\S+)+", value): + carp("prefer multiword name") + if not re.search(r"\S", value): + carp("name is blank") + return value.upper() # enforce capitalization + +#----------------------------- +class Person: + def __init__(self, name = None, age = None, peers = None): + if peers is None: peers = [] # See Python FAQ 6.25 + self.name = name + self.age = age + self.peers = peers + + def exclaim(self): + return "Hi, I'm %s, age %d, working with %s" % \ + (self.name, self.age, ", ".join(self.peers)) + + def happy_birthday(self): + self.age += 1 + return self.age +#----------------------------- + +# ^^PLEAC^^_13.4 +#----------------------------- +## In the module named 'Person' ... +def population(): + return Person.body_count[0] + +class Person(object): + body_count = [0] # class variable - shared across all instances + + def __init__(self): + self.body_count[0] += 1 + + def __del__(self): # Beware - may be non-deterministic (Jython)! + self.body_count[0] -= 1 + +# later, the user can say this: +import Person +people = [] +for i in range(10): + people.append(Person.Person()) +print "There are", Person.population(), "people alive." + +#=> There are 10 people alive. +#----------------------------- +him = Person() +him.gender = "male" + +her = Person() +her.gender = "female" + +#----------------------------- +FixedArray.max_bounds = 100 # set for whole class +alpha = FixedArray.FixedArray() +print "Bound on alpha is", alpha.max_bounds +#=>100 + +beta = FixedArray.FixedArray() +beta.max_bounds = 50 # still sets for whole class +print "Bound on alpha is", alpha.max_bounds +#=>50 +#----------------------------- +# In the module named 'FixedArray' + +class FixedArray(object): + _max_bounds = [7] # Shared across whole class + + def __init__(self, bounds=None): + if bounds is not None: + self.max_bounds = bounds + + def get_max_bounds(self): + return self._max_bounds[0] + def set_max_bounds(self, val): + self._max_bounds[0] = val + max_bounds = property(get_max_bounds, set_max_bounds) +#----------------------------- + +# ^^PLEAC^^_13.5 +#----------------------------- +# There isn't the severe separation between scalar, arrays and hashs +# in Python, so there isn't a direct equivalent to the Perl code. +class Person: + def __init__(self, name=None, age=None, peers=None): + if peers is None: + peers = [] + self.name = name + self.age = age + self.peers = peers + +p = Person("Jason Smythe", 13, ["Wilbur", "Ralph", "Fred"]) + +# or this way. (This is not the prefered style as objects should +# be constructed with all the appropriate data, if possible.) + +p = Person() # allocate an empty Person +p.name = "Jason Smythe" # set its name field +p.age = 13 # set its age field +p.peers.extend( ["Wilbur", "Ralph", "Fred" ] ) # set its peers field + +p.peers = ["Wilbur", "Ralph", "Fred"] + +p.peers[:]= ["Wilbur", "Ralph", "Fred"] + +# fetch various values, including the zeroth friend +print "At age %d, %s's first friend is %s." % \ + (p.age, p.name, p.peers[0]) +#----------------------------- +# This isn't very Pythonic - should create objects with the +# needed data, and not depend on defaults and modifing the object. +import sys +def carp(s): + sys.stderr.write("WARNING: " + s + "\n") + +class Person: + def __init__(self, name = "", age = 0): + self.name = name + self.age = age + def __setattr__(self, name, value): + if name == "age": + # This is very unpythonic + if not isinstance(value, type(0)): + carp("age '%s' isn't numeric" % (value,)) + if value > 150: carp("age '%s' is unreasonable" % (value,)) + self.__dict__[name] = value + +class Family: + def __init__(self, head = None, address = "", members = None): + if members is None: members = [] + self.head = head or Person() + self.address = address + self.members = members + +folks = Family() + +dad = folks.head +dad.name = "John" +dad.age = 34 + +print "%s's age is %d" % (folks.head.name, folks.head.age) +#----------------------------- +class Card: + def __init__(self, name=None, color=None, cost=None, + type=None, release=None, text=None): + self.name = name + self.color = color + self.cost = cost + self.type = type + self.release = release + self.type = type +#----------------------------- +# For positional args +class Card: + _names = ("name", "color", "cost", "type", "release", "type") + def __init__(self, *args): + assert len(args) <= len(self._names) + for k, v in zip(self._names, args): + setattr(self, k, None) +#----------------------------- +# For keyword args +class Card: + _names = ("name", "color", "cost", "type", "release", "type") + def __init__(self, **kwargs): + for k in self._names: # Set the defaults + setattr(self, k, None) + for k, v in kwargs.items(): # add in the kwargs + assert k in self._names, "Unexpected kwarg: " + k + setattr(self, k, v) +#----------------------------- +class hostent: + def __init__(self, addr_list = None, length = None, + addrtype = None, aliases = None, name = None): + self.addr_list = addr_list or [] + self.length = length or 0 + self.addrtype = addrtype or "" + self.aliases = aliases or [] + self.name = name or "" +#----------------------------- +## XXX What do I do with these? +#define h_type h_addrtype +#define h_addr h_addr_list[0] +#----------------------------- +# make (hostent object)->type() same as (hostent object)->addrtype() +# +# *hostent::type = \&hostent::addrtype; +# +# # make (hostenv object)-> +# addr() +# same as (hostenv object)->addr_list(0) +#sub hostent::addr { shift->addr_list(0,@_) } +#----------------------------- +# No equivalent to Net::hostent (Python uses an unnamed tuple) +#package Extra::hostent; +#use Net::hostent; +#@ISA = qw(hostent); +#sub addr { shift->addr_list(0,@_) } +#1; +#----------------------------- + +# ^^PLEAC^^_13.6 +#----------------------------- +class Class(Parent): + pass +#----------------------------- +## Note: this is unusual in Python code +ob1 = SomeClass() +# later on +ob2 = ob1.__class__() +#----------------------------- +## Note: this is unusual in Python code +ob1 = Widget() +ob2 = ob1.__class__() +#----------------------------- +# XXX I do not know the intent of the original Perl code +# Do not use this style of programming in Python. +import time +class Person(possible,base,classes): + def __init__(self, *args, **kwargs): + # Call the parents' constructors, if there are any + for baseclass in self.__class__.__bases__: + init = getattr(baseclass, "__init__") + if init is not None: + init(self, *args, **kwargs) + self.PARENT = parent # init data fields + self.START = time.time() + self.AGE = 0 +#----------------------------- + +# ^^PLEAC^^_13.7 +#----------------------------- +methname = "flicker" +getattr(obj, methname)(10) # calls obj->flicker(10); + +# call three methods on the object, by name +for m in ("start", "run", "stop"): + getattr(obj, m)() +#----------------------------- +methods = ("name", "rank", "serno") +his_info = {} +for m in methods: + his_info[m] = getattr(ob, m)() + +# same as this: + +his_info = { + 'name': ob.name(), + 'rank': ob.rank(), + 'serno': ob.serno(), +} +#----------------------------- +fnref = ob.method +#----------------------------- +fnref(10, "fred") +#----------------------------- +obj.method(10, "fred") +#----------------------------- +# XXX Not sure if this is the correct translation. +# XXX Is 'can' special? +if isinstance(obj_target, obj.__class__): + obj.can('method_name')(obj_target, *arguments) +#----------------------------- + +# ^^PLEAC^^_13.8 +#----------------------------- +isinstance(obj, mimetools.Message) +issubclass(obj.__class__, mimetools.Message) + +if hasattr(obj, "method_name"): # check method validity + pass +#----------------------------- +## Explicit type checking is needed fewer times than you think. +his_print_method = getattr(obj, "as_string", None) +#----------------------------- +__version__ = (3, 0) +Some_Module.__version__ + +# Almost never used, and doesn't work for builtin types, which don't +# have a __module__. + +his_vers = obj.__module__.__version__ +#----------------------------- +if Some_Module.__version__ < (3, 0): + raise ImportError("Some_Module version %s is too old, expected (3, 0)" % + (Some_Module.__version__,)) +# or more simply +assert Some_Module.__version__ >= (3, 0), "version too old" + +#----------------------------- +__VERSION__ = '1.01' +#----------------------------- + +# ^^PLEAC^^_13.9 +#----------------------------- +# Note: This uses the standard Python idiom of accessing the +# attributes directly rather than going through a method call. +# See earlier in this chapter for examples of how this does +# not break encapsulation. +class Person: + def __init__(self, name = "", age = 0): + self.name = name + self.age = age +#----------------------------- +# Prefered: dude = Person("Jason", 23) +dude = Person() +dude.name = "Jason" +dude.age = 23 +print "%s is age %d." % (dude.name, dude.age) +#----------------------------- +class Employee(Person): + pass +#----------------------------- +# Prefered: empl = Employee("Jason", 23) +emp = Employee() +empl.name = "Jason" +empl.age = 23 +print "%s is age %d." % (empl.name, empl.age) +#----------------------------- + +# ^^PLEAC^^_13.10 +#----------------------------- +# This doesn't need to be done since if 'method' doesn't +# exist in the Class it will be looked for in its BaseClass(es) +class Class(BaseClass): + def method(self, *args, **kwargs): + BaseClass.method(self, *args, **kwargs) + +# This lets you pick the specific method in one of the base classes +class Class(BaseClass1, BaseClass2): + def method(self, *args, **kwargs): + BaseClass2.method(self, *args, **kwargs) + +# This looks for the first method in the base class(es) without +# specifically knowing which base class. This reimplements +# the default action so isn't really needed. +class Class(BaseClass1, BaseClass2, BaseClass3): + def method(self, *args, **kwargs): + for baseclass in self.__class__.__bases__: + f = getattr(baseclass, "method") + if f is not None: + return f(*args, **kwargs) + raise NotImplementedError("method") + +#----------------------------- +self.meth() # Call wherever first meth is found + +Where.meth(self) # Call in the base class "Where" + +# XXX Does Perl only have single inheritence? Or does +# it check all base classes? No directly equivalent way +# to do this in Python, but see above. +#----------------------------- +import time + +# The Perl code calls a private '_init' function, but in +# Python there's need for the complexity of 'new' mechanism +# so it's best just to put the '_init' code in '__init__'. +class Class: + def __init__(self, *args): + # init data fields + self.START = time.time() + self.AGE = 0 + self.EXTRA = args # anything extra +#----------------------------- +obj = Widget(haircolor = "red", freckles = 121) +#----------------------------- +class Class(Base1, Base2, Base3): + def __init__(self, *args, **kwargs): + for base in self.__class__.__bases__: + f = getattr(base, "__init__") + if f is not None: + f(self, *args, **kwargs) +#----------------------------- + +# ^^PLEAC^^_13.11 +#----------------------------- +# NOTE: Python prefers direct attribute lookup rather than +# method calls. Python 2.2 will introduce a 'get_set' which +# *may* be equivalent, but I don't know enough about it. So +# instead I'll describe a class that lets you restrict access +# to only specific attributes. + +class Private: + def __init__(self, names): + self.__names = names + self.__data = {} + def __getattr__(self, name): + if name in self.__names: + return self.__data[name] + raise AttributeError(name) + def __setattr__(self, name, value): + if name.startswith("_Private"): + self.__dict__[name] = value + return + if name in self.__names: + self.__data[name] = value + return + raise TypeError("cannot set the attribute %r" % (name,)) + +class Person(Private): + def __init__(self, parent = None): + Private.__init__(self, ["name", "age", "peers", "parent"]) + self.parent = parent + def new_child(self): + return Person(self) +#----------------------------- +dad = Person() +dad.name = "Jason" +dad.age = 23 +kid = dad.new_child() +kid.name = "Rachel" +kid.age = 2 +print "Kid's parent is", kid.parent.name +#=>Kid's parent is Jason + +# ^^PLEAC^^_13.12 +#----------------------------- +## XXX No clue on what this does. For that matter, what's +## "The Data Inheritance Problem"? + +# ^^PLEAC^^_13.13 +#----------------------------- +node.NEXT = node +#----------------------------- +# This is not a faithful copy of the Perl code, but it does +# show how to have the container's __del__ remove cycles in +# its contents. Note that Python 2.0 includes a garbage +# collector that is able to remove these sorts of cycles, but +# it's still best to prevent cycles in your code. +class Node: + def __init__(self, value = None): + self.next = self + self.prev = self + self.value = value + +class Ring: + def __init__(self): + self.ring = None + self.count = 0 + + def __str__(self): + # Helpful when debugging, to print the contents of the ring + s = "#%d: " % self.count + x = self.ring + if x is None: + return s + values = [] + while True: + values.append(x.value) + x = x.next + if x is self.ring: + break + return s + " -> ".join(map(str, values)) + " ->" + + def search(self, value): + node = self.ring + while True: + if node.value == value: + return node + node = node.next + if node is self.ring: + break + + def insert_value(self, value): + node = Node(value) + if self.ring is not None: + node.prev, node.next = self.ring.prev, self.ring + self.ring.prev.next = self.ring.prev = node + self.ring = node + self.count += 1 + + def delete_value(self, value): + node = self.search(value) + if node is not None: + self.delete_node(node) + + def delete_node(self, node): + if node is node.next: + node.next = node.prev = None + self.ring = None + else: + node.prev.next, node.next.prev = node.next, node.prev + if node is self.ring: + self.ring = node.next + self.count -= 1 + + def __del__(self): + while self.ring is not None: + self.delete_node(self.ring) + +COUNT = 1000 +for rep in range(20): + r = Ring() + for i in range(COUNT): + r.insert_value(i) +#----------------------------- + +# ^^PLEAC^^_13.14 +#----------------------------- +import UserString +class MyString(UserString.UserString): + def __cmp__(self, other): + return cmp(self.data.upper(), other.upper()) + +class Person: + def __init__(self, name, idnum): + self.name = name + self.idnum = idnum + def __str__(self): + return "%s (%05d)" % (self.name.lower().capitalize(), self.idnum) + +#----------------------------- +class TimeNumber: + def __init__(self, hours, minutes, seconds): + assert minutes < 60 and seconds < 60 + self.hours = hours + self.minutes = minutes + self.seconds = seconds + def __str__(self): + return "%d:%02d:%02d" % (self.hours, self.minutes, self.seconds) + def __add__(self, other): + seconds = self.seconds + other.seconds + minutes = self.minutes + other.minutes + hours = self.hours + other.hours + if seconds >= 60: + seconds %= 60 + minutes += 1 + if minutes >= 60: + minutes %= 60 + hours += 1 + return TimeNumber(hours, minutes, seconds) + + def __sub__(self, other): + raise NotImplementedError + + def __mul__(self, other): + raise NotImplementedError + + def __div__(self, other): + raise NotImplementedError + +t1 = TimeNumber(0, 58, 59) +sec = TimeNumber(0, 0, 1) +min = TimeNumber(0, 1, 0) +print t1 + sec + min + min +# 1:01:00 + +#----------------------------- +# For demo purposes only - the StrNum class is superfluous in this +# case as plain strings would give the same result. +class StrNum: + def __init__(self, value): + self.value = value + + def __cmp__(self, other): # both <=> and cmp + # providing <=> gives us <, ==, etc. for free. + # __lt__, __eq__, and __gt__ can also be individually specified + return cmp(self.value, other.value) + + def __str__(self): # "" + return self.value + + def __nonzero__(self, other): # bool + return bool(self.value) + + def __int__(self, other): # 0+ + return int(self.value) + + def __add__(self, other): # + + return StrNum(self.value + other.value) + + def __radd__(self, other): # +, inverted + return StrNum(other.value + self.value) + + def __mul__(self, other): # * + return StrNum(self.value * other) + + def __rmul__(self, other): # *, inverted + return StrNum(self.value * other) + + +def demo(): + # show_strnum - demo operator overloading + x = StrNum("Red") + y = StrNum("Black") + z = x + y + r = z * 3 + print "values are %s, %s, %s, and %s" % (x, y, z, r) + if x < y: + s = "LT" + else: + s = "GE" + print x, "is", s, y + +if __name__ == "__main__": + demo() +# values are Red, Black, RedBlack, and RedBlackRedBlackRedBlack +# Red is GE Black + +#----------------------------- +#!/usr/bin/env python +# demo_fixnum - show operator overloading + +# sum of STRFixNum: 40 and STRFixNum: 12 is STRFixNum: 52 +# product of STRFixNum: 40 and STRFixNum: 12 is STRFixNum: 480 +# STRFixNum: 3 has 0 places +# div of STRFixNum: 40 by STRFixNum: 12 is STRFixNum: 3.33 +# square of that is STRFixNum: 11.11 + +# This isn't excatly the same as the original Perl code since +# I couldn't figure out why the PLACES variable was used. +#----------------------------- +import re +_places_re = re.compile(r"\.(\d+)") + +default_places = 0 + +class FixNum: + def __init__(self, value, places = None): + self.value = value + if places is None: + # get from the value + m = _places_re.search(str(value)) + if m: + places = int(m.group(1)) + else: + places = default_places + self.places = places + + def __add__(self, other): + return FixNum(self.value + other.value, + max(self.places, other.places)) + + def __mul__(self, other): + return FixNum(self.value * other.value, + max(self.places, other.places)) + + def __div__(self, other): + # Force to use floating point, since 2/3 in Python is 0 + # Don't use float() since that will convert strings + return FixNum((self.value+0.0) / other.value, + max(self.places, other.places)) + + def __str__(self): + return "STR%s: %.*f" % (self.__class__.__name__, + self.places, self.value) + def __int__(self): + return int(self.value) + + def __float__(self): + return self.value + +def demo(): + x = FixNum(40) + y = FixNum(12, 0) + + print "sum of", x, "and", y, "is", x+y + print "product of", x, "and", y, "is", x*y + + z = x/y + print "%s has %d places" % (z, z.places) + if not z.places: + z.places = 2 + + print "div of", x, "by", y, "is", z + print "square of that is ", z*z + +if __name__ == "__main__": + demo() + + +# ^^PLEAC^^_13.15 +# You can't tie a variable, but you can use properties. +import itertools +class ValueRing(object): + def __init__(self, colours): + self.colourcycle = itertools.cycle(colours) + + def next_colour(self): + return self.colourcycle.next() + colour = property(next_colour) +vr = ValueRing(["red", "blue"]) +for i in range(6): + print vr.colour, +print + +# Note that you MUST refer directly to the property +x = vr.colour +print x, x, x +#------------------------------------- +# Ties are generally unnecessary in Python because of its strong OO support - +# The resulting code is MUCH shorter: +class AppendDict(dict): + def __setitem__(self, key, val): + if key in self: + self[key].append(val) + else: + super(AppendDict, self).__setitem__(key, [val]) +tab = AppendDict() +tab["beer"] = "guinness" +tab["food"] = "potatoes" +tab["food"] = "peas" + +for key, val in tab.items(): + print key, "=>", val +#------------------------------------- +class CaselessDict(dict): + def __setitem__(self, key, val): + super(CaselessDict, self).__setitem__(key.lower(), val) + def __getitem__(self, key): + return super(CaselessDict, self).__getitem__(key.lower()) + +tab = CaselessDict() +tab["VILLAIN"] = "big " +tab["herOine"] = "red riding hood" +tab["villain"] = "bad wolf" + +for key, val in tab.items(): + print key, "is", val +#=>villain is bad wolf +#=>heroine is red riding hood +#------------------------------------- +class RevDict(dict): + def __setitem__(self, key, val): + super(RevDict, self).__setitem__(key, val) + super(RevDict, self).__setitem__(val, key) + +tab = RevDict() +tab["red"] = "rojo" +tab["blue"] = "azul" +tab["green"] = "verde" +tab["evil"] = ("No Way!", "Way!") + +for key, val in tab.items(): + print key, "is", val +#=>blue is azul +#=>('No Way!', 'Way!') is evil +#=>rojo is red +#=>evil is ('No Way!', 'Way!') +#=>azul is blue +#=>verde is green +#=>green is verde +#=>red is rojo +#------------------------------------- +import itertools +for elem in itertools.count(): + print "Got", elem +#------------------------------------- +# You could use FileDispatcher from section 7.18 +tee = FileDispatcher(sys.stderr, sys.stdout) +#------------------------------------- +# @@PLEAC@@_14.0 + +# See http://www.python.org/doc/topics/database/ for Database Interfaces details. +# currently listed on http://www.python.org/doc/topics/database/modules/ +# +# DB/2, Informix, Interbase, Ingres, JDBC, MySQL, pyodbc, mxODBC, ODBC Interface, +# DCOracle, DCOracle2, PyGresQL, psycopg, PySQLite, sapdbapi, Sybase, ThinkSQL. +# + +# @@PLEAC@@_14.1 +#------------------------------------- +import anydbm +filename = "test.db" +try: + db = anydbm.open(filename) +except anydbm, err: + print "Can't open %s: %s!" % (filename, err) + +db["key"] = "value" # put value into database +if "key" in db: # check whether in database + val = db.pop("key") # retrieve and remove from database +db.close() # close the database +#------------------------------------- +# download the following standalone program +#!/usr/bin/python +# userstats - generates statistics on who logged in. +# call with an argument to display totals + +import sys, os, anydbm, re + +db_file = '/tmp/userstats.db' # where data is kept between runs + +try: + db = anydbm.open(db_file,'c') # open, create if it does not exist +except: + print "Can't open db %s: %s!" % (db_file, sys.exc_info()[1]) + sys.exit(1) + +if len(sys.argv) > 1: + if sys.argv[1] == 'ALL': + userlist = db.keys() + else: + userlist = sys.argv[1:] + userlist.sort() + for user in userlist: + if db.has_key(user): + print "%s\t%s" % (user, db[user]) + else: + print "%s\t%s" % (user, 0) +else: + who = os.popen('who').readlines() # run who(1) + if len(who)<1: + print "error running who" # exit + sys.exit(1) + # extract username (first thin on the line) and update + user_re = re.compile("^(\S+)") + for line in who: + fnd = user_re.search(line) + if not fnd: + print "Bad line from who: %s" % line + sys.exit(1) + user = fnd.groups()[0] + if not db.has_key(user): + db[user] = "0" + db[user] = str(int(db[user])+1) # only strings are allowed +db.close() + + + + +# @@PLEAC@@_14.2 +# Emptying a DBM File + +import anydbm + +try: + db = anydbm.open(FILENAME,'w') # open, for writing +except anydbm.error, err: + print "Can't open db %s: %s!" % (filename, err) + raise SystemExit(1) + +db.clear() +db.close() +# ------------------------------- +try: + db = anydbm.open(filename,'n') # open, always create a new empty db +except anydbm.error, err: + print "Can't open db %s: %s!" % (filename, err) + raise SystemExit(1) + +db.close() +# ------------------------------- +import os +try: + os.remove(FILENAME) +except OSError, err: + print "Couldn't remove %s to empty the database: %s!" % (FILENAME, + err) + raise SystemExit + +try: + db = anydbm.open(FILENAME,'n') # open, flways create a new empty db +except anydbm.error, err: + print "Couldn't create %s database: %s!" % (FILENAME, err) + raise SystemExit + +# @@PLEAC@@_14.3 +# Converting Between DBM Files + +# download the following standalone program +#!/usr/bin/python +# db2gdbm: converts DB to GDBM + +import sys +import dbm, gdbm + +if len(sys.argv)<3: + print "usage: db2gdbm infile outfile" + sys.exit(1) + +(infile, outfile) = sys.argv[1:] + +# open the files +try: + db_in = dbm.open(infile) +except: + print "Can't open infile %s: %s!" % (infile, sys.exc_info()[1]) + sys.exit(1) +try: + db_out = dbm.open(outfile,"n") +except: + print "Can't open outfile %s: %s!" % (outfile, sys.exc_info()[1]) + sys.exit(1) + +# copy (don't use db_out = db_in because it's slow on big databases) +# is this also so for python ? +for k in db_in.keys(): + db_out[k] = db_in[k] + +# these close happen automatically at program exit +db_out.close() +db_in.close() + + + +# @@PLEAC@@_14.4 + +OUTPUT.update(INPUT1) +OUTPUT.update(INPUT2) + +OUTPUT = anydbm.open("OUT","n") +for INPUT in (INPUT1, INPUT2, INPUT1): + for key, value in INPUT.iteritems(): + if OUTPUT.has_key(key): + # decide which value to use and set OUTPUT[key] if necessary + print "key %s already present: %s, new: %s" % ( + key, OUTPUT[key], value ) + else: + OUTPUT[key] = value + +# @@PLEAC@@_14.5 +# On systems where the Berkeley DB supports it, dbhash takes an +# "l" flag: +import dbhash +dbhash.open("mydb.db", "cl") # 'c': create if doesn't exist + +# @@INCOMPLETE@@ + +# @@PLEAC@@_14.6 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_14.7 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_14.8 +# shelve uses anydbm to access and chooses between DBMs. +# anydbm detect file formats automatically. +import shelve +db = shelve.open("celebrities.db") + +name1 = "Greg Stein" +name2 = "Greg Ward" + +# shelve uses pickle to convert objects into strings and back. +# This is automatic. +db[name1] = ["of ViewCVS fame", "gstein@lyra.org"] +db[name2] = ["of Distutils fame", "gward@python.net"] + +greg1 = db[name1] +greg2 = db[name2] + +print "Two Gregs: %x %x" % (id(greg1), id(greg2)) + +if greg1 == greg2: + print "You're having runtime fun with one Greg made two." +else: + print "No two Gregs are ever alike." + +# Changes to mutable entries are not written back by default. +# You can get the copy, change it, and put it back. +entry = db[name1] +entry[0] = "of Subversion fame" +db[name1] = entry + +# Or you can open shelve with writeback option. Then you can +# change mutable entries directly. (New in 2.3) +db = shelve.open("celebrities.db", writeback=True) +db[name2][0] = "of Optik fame" + +# However, writeback option can consume vast amounts of memory +# to do its magic. You can clear cache with sync(). +db.sync() +#----------------------------- + +# @@PLEAC@@_14.9 +# DON'T DO THIS. +import os as _os, shelve as _shelve + +_fname = "persist.db" +if not _os.path.exists(_fname): + var1 = "foo" + var2 = "bar" +_d = _shelve.open("persist.db") +globals().update(_d) + +print "var1 is %s; var2 is %s"%(var1, var2) +var1 = raw_input("New var1: ") +var2 = raw_input("New var2: ") + +for key, val in globals().items(): + if not key.startswith("_"): + _d[key] = val +# @@INCOMPLETE@@ + +# @@PLEAC@@_14.10 +#----------------------------- +import dbmodule + +dbconn = dbmodule.connect(arguments...) + +cursor = dbconn.cursor() +cursor.execute(sql) + +while True: + row = cursor.fetchone() + if row is None: + break + ... + +cursor.close() +dbconn.close() + +#----------------------------- +import MySQLdb +import pwd + +dbconn = MySQLdb.connect(db='dbname', host='mysqlserver.domain.com', + port=3306, user='user', passwd='password') + +cursor = dbconn.cursor() +cursor.execute("CREATE TABLE users (uid INT, login CHAR(8))") + +# Note: some databases use %s for parameters, some use ? or other +# formats +sql_fmt = "INSERT INTO users VALUES( %s, %s )" + +for userent in pwd.getpwall(): + # the second argument contains a list of parameters which will + # be quoted before being put in the query + cursor.execute(sql_fmt, (userent.pw_uid, userent.pw_name)) + +cursor.execute("SELECT * FROM users WHERE uid < 50") + +for row in cursor.fetchall(): + # NULL will be displayed as None + print ", ".join(map(str, row)) + +cursor.execute("DROP TABLE users") +cursor.close() +dbconn.close() +#----------------------------- + +# @@PLEAC@@_14.11 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_15.1 +#----------------------------- +# Parsing program arguments +# -- getopt way (All Python versions) + +#----------------------------- +# Preamble + +import sys +import getopt + +# getopt() explicitly receives arguments for it to process. +# No magic. Explicit is better than implicit. + +# PERL: @ARGV +argv = sys.argv[1:] + +# Note that sys.argv[0] is the script name, and need to be +# stripped. + +#----------------------------- +# Short options + +# PERL: getopt("vDo"); +# Polluting the caller's namespace is evil. Don't do that. + +# PERL: getopt("vDo:", \%opts); +opts, rest = getopt.getopt(argv, "vDo:") + +# If you want switches to take arguments, you must say so. +# Unlike PERL, which silently performs its magic, switches +# specified without trailing colons are considered boolean +# flags by default. + +# PERL: getopt("vDo", \%opts); +opts, rest = getopt.getopt(argv, "v:D:o:") + +# PERL: getopts("vDo:", \%opts); +# getopt/getopts distinction is not present in Python 'getopt' +# module. + +#----------------------------- +# getopt() return values, compared to PERL + +# getopt() returns two values. The first is a list of +# (option, value) pair. (Not a dictionary, i.e. Python hash.) +# The second is the list of arguments left unprocessed. + +# Example +# >>> argv = "-v ARG1 -D ARG2 -o ARG3".split() +# >>> opts, rest = getopt.getopt(argv, "v:D:o:") +# >>> print opts +# [('-v', 'ARG1'), ('-D', 'ARG2'), ('-o', 'ARG3')] + +#----------------------------- +# Long options + +# getopt() handles long options too. Pass a list of option +# names as the third argument. If an option takes an argument, +# append an equal sign. + +opts, rest = getopt.getopt(argv, "", [ + "verbose", "Debug", "output="]) + +#----------------------------- +# Switch clustering + +# getopt() does switch clustering just fine. + +# Example +# >>> argv1 = '-r -f /tmp/testdir'.split() +# >>> argv2 = '-rf /tmp/testdir'.split() +# >>> print getopt.getopt(argv1, 'rf') +# ([('-r', ''), ('-f', '')], ['/tmp/testdir']) +# >>> print getopt.getopt(argv2, 'rf') +# ([('-r', ''), ('-f', '')], ['/tmp/testdir']) + +#----------------------------- +# @@INCOMPLETE@@ + +# TODO: Complete this section using 'getopt'. Show how to +# use the parsed result. + +# http://www.python.org/doc/current/lib/module-getopt.html +# Python library reference has a "typical usage" demo. + +# TODO: Introduce 'optparse', a very powerful command line +# option parsing module. New in 2.3. + + +# @@PLEAC@@_15.2 +##------------------ +import sys + +def is_interactive_python(): + try: + ps = sys.ps1 + except: + return False + return True +##------------------ +import sys +def is_interactive(): + # only False if stdin is redirected like "-t" in perl. + return sys.stdin.isatty() + +# Or take advantage of Python's Higher Order Functions: +is_interactive = sys.stdin.isatty +##------------------ +import posix +def is_interactive_posix(): + tty = open("/dev/tty") + tpgrp = posix.tcgetpgrp(tty.fileno()) + pgrp = posix.getpgrp() + tty.close() + return (tpgrp == pgrp) + +# test with: +# python 15.2.py +# echo "dummy" | python 15.2.py | cat +print "is python shell:", is_interactive_python() +print "is a tty:", is_interactive() +print "has no tty:", is_interactive_posix() + +if is_interactive(): + while True: + try: + ln = raw_input("Prompt:") + except: + break + print "you typed:", ln + + +# @@PLEAC@@_15.3 + +# Python has no Term::Cap module. +# One could use the curses, but this was not ported to windows, +# use console. + +# just run clear +import os +os.system("clear") +# cache output +clear = os.popen("clear").read() +print clear +# or to avoid print's newline +sys.stdout.write(clear) + +# @@PLEAC@@_15.4 +# Determining Terminal or Window Size + +# eiter use ioctl +import struct, fcntl, termios, sys + +s = struct.pack("HHHH", 0, 0, 0, 0) +hchar, wchar = struct.unpack("HHHH", fcntl.ioctl(sys.stdout.fileno(), + termios.TIOCGWINSZ, s))[:2] +# or curses +import curses +(hchar,wchar) = curses.getmaxyx() + +# graph contents of values +import struct, fcntl, termios, sys +width = struct.unpack("HHHH", fcntl.ioctl(sys.stdout.fileno(), + termios.TIOCGWINSZ, + struct.pack("HHHH", 0, 0, 0, 0)))[1] +if width<10: + print "You must have at least 10 characters" + raise SystemExit + +max_value = 0 +for v in values: + max_value = max(max_value,v) + +ratio = (width-10)/max_value # chars per unit +for v in values: + print "%8.1f %s" % (v, "*"*(v*ratio)) + +# @@PLEAC@@_15.5 + +# there seems to be no standard ansi module +# and BLINK does not blink here. +RED = '\033[31m' +RESET = '\033[0;0m' +BLINK = '\033[05m' +NOBLINK = '\033[25m' + +print RED+"DANGER, Will Robinson!"+RESET +print "This is just normal text" +print "Will ``"+BLINK+"Do you hurt yet?"+NOBLINK+"'' and back" + +# @@PLEAC@@_15.6 + +# Show ASCII values for keypresses + +# _Getch is from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/134892 +class _Getch: + """Gets a single character from standard input. Doesn't echo to screen.""" + def __init__(self): + try: + self.impl = _GetchWindows() + except ImportError: + self.impl = _GetchUnix() + + def __call__(self): + return self.impl() + + +class _GetchUnix: + def __init__(self): + import tty, sys + + def __call__(self): + import sys, tty, termios + fd = sys.stdin.fileno() + old_settings = termios.tcgetattr(fd) + try: + tty.setraw(sys.stdin.fileno()) + ch = sys.stdin.read(1) + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + return ch + + +class _GetchWindows: + def __init__(self): + import msvcrt + + def __call__(self): + import msvcrt + return msvcrt.getch() + + +getch = _Getch() + +print "Press keys to see their ASCII values. Use Ctrl-C to quit.\n" +try: + while True: + char = ord(getch()) + if char == 3: + break + print " Decimal: %3d Octal: %3o Hex: x%02x" % (char, char, char) +except KeyboardError: + pass +#---------------------------------------- + +# @@PLEAC@@_15.7 +print "\aWake up!\n"; +#---------------------------------------- +# @@INCOMPLETE@@ + +# @@PLEAC@@_15.8 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_15.9 +# On Windows +import msvcrt +if msvcrt.kbhit(): + c = msvcrt.getch + +# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/134892 +# @@INCOMPLETE@@ + + +# @@PLEAC@@_15.10 +#---------------------------------------- +import getpass +import pwd +import crypt +password = getpass.getpass('Enter your password: ') +username = getpass.getuser() +encrypted = pwd.getpwnam(username).pw_passwd +if not encrypted or encrypted == 'x': + # If using shadow passwords, this will be empty or 'x' + print "Cannot verify password" +elif crypt.crypt(password, encrypted) != encrypted: + print "You are not", username +else: + print "Welcome,", username +#---------------------------------------- + +# @@PLEAC@@_15.11 + +# simply importing readline gives line edit capabilities to raw_ +import readline +readline.add_history("fake line") +line = raw_input() + +# download the following standalone program +#!/usr/bin/python +# vbsh - very bad shell + +import os +import readline + +while True: + try: + cmd = raw_input('$ ') + except EOFError: + break + status = os.system(cmd) + exit_value = status >> 8 + signal_num = status & 127 + dumped_core = status & 128 and "(core dumped)" or "" + print "Program terminated with status %d from signal %d%s\n" % ( + exit_value, signal_num, dumped_core) + + + +readline.add_history("some line!") +readline.remove_history_item(position) +line = readline.get_history_item(index) + +# an interactive python shell would be +import code, readline +code.InteractiveConsole().interact("code.InteractiveConsole") + +# @@PLEAC@@_15.12 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_15.13 +#---------------------------------------- +# This entry uses pexpect, a pure Python Expect-like module. +# http://pexpect.sourceforge.net/ + +# for more information, check pexpect's documentation and example. + +import pexpect + +#---------------------------------------- +# spawn program +try: + command = pexpect.spawn("program to run") +except pexpect.ExceptionPexpect: + # couldn't spawn program + pass + +#---------------------------------------- +# you can pass any filelike object to setlog +# passing None will stop logging + +# stop logging +command.setlog(None) + +# log to stdout +import sys +command.setlog(sys.stdout) + +# log to specific file +fp = file("pexpect.log", "w") +command.setlog(fp) + +#---------------------------------------- +# expecting simple string +command.expect("ftp>") + +# expecting regular expression +# actually, string is always treated as regular expression + +# so it's the same thing +command.expect("Name.*:") + +# you can do it this way, too +import re +regex = re.compile("Name.*:") +command.expect(regex) + +#---------------------------------------- +# expecting with timeout +try: + command.expect("Password:", 10) +except pexpect.TIMEOUT: + # timed out + pass + +# setting default timeout +command.timeout = 10 + +# since we set default timeout, following does same as above +try: + command.expect("Password:") +except pexpect.TIMEOUT: + # timed out + pass + +#---------------------------------------- +# what? do you *really* want to wait forever? + +#---------------------------------------- +# sending line: normal way +command.sendline("get spam_and_ham") + +# you can also treat it as file +print>>command, "get spam_and_ham" + +#---------------------------------------- +# finalization + +# close connection with child process +# (that is, freeing file descriptor) +command.close() + +# kill child process +import signal +command.kill(signal.SIGKILL) + +#---------------------------------------- +# expecting multiple choices +which = command.expect(["invalid", "success", "error", "boom"]) + +# return value is index of matched choice +# 0: invalid +# 1: success +# 2: error +# 3: boom + +#---------------------------------------- +# avoiding exception handling +choices = ["invalid", "success", "error", "boom"] +choices.append(pexpect.TIMEOUT) +choices.append(pexpect.EOF) + +which = command.expect(choices) + +# if TIMEOUT or EOF occurs, appropriate index is returned +# (instead of raising exception) +# 4: TIMEOUT +# 5: EOF + +# @@PLEAC@@_15.14 +from Tkinter import * + +def print_callback(): + print "print_callback" + +main = Tk() + +menubar = Menu(main) +main.config(menu=menubar) + +file_menu = Menu(menubar) +menubar.add_cascade(label="File", underline=1, menu=file_menu) +file_menu.add_command(label="Print", command=print_callback) + +main.mainloop() + +# using a class +from Tkinter import * + +class Application(Tk): + def print_callback(self): + print "print_callback" + def debug_callback(self): + print "debug:", self.debug.get() + print "debug level:", self.debug_level.get() + + def createWidgets(self): + menubar = Menu(self) + self.config(menu=menubar) + file_menu = Menu(menubar) + menubar.add_cascade(label="File", + underline=1, menu=file_menu) + file_menu.add_command(label="Print", + command=self.print_callback) + file_menu.add_command(label="Quit Immediately", + command=sys.exit) + # + options_menu = Menu(menubar) + menubar.add_cascade(label="Options", + underline=0, menu=options_menu) + options_menu.add_checkbutton( + label="Create Debugging File", + variable=self.debug, + command=self.debug_callback, + onvalue=1, offvalue=0) + options_menu.add_separator() + options_menu.add_radiobutton( + label = "Level 1", + variable = self.debug_level, + value = 1 + ) + options_menu.add_radiobutton( + label = "Level 2", + variable = self.debug_level, + value = 2 + ) + options_menu.add_radiobutton( + label = "Level 3", + variable = self.debug_level, + value = 3 + ) + + def __init__(self, master=None): + Tk.__init__(self, master) + # bound variables must be IntVar, StrVar, ... + self.debug = IntVar() + self.debug.set(0) + self.debug_level = IntVar() + self.debug_level.set(1) + self.createWidgets() + +app = Application() +app.mainloop() + +# @@PLEAC@@_15.15 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_15.16 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_15.17 +# Start Python scripts without the annoying DOS window on win32 +# Use extension ".pyw" on files - eg: "foo.pyw" instead of "foo.py" +# Or run programs using "pythonw.exe" rather than "python.exe" + +# @@PLEAC@@_15.18 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_15.19 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + + +# @@PLEAC@@_16.1 +import popen2 + +# other popen methods than popen4 can lead to deadlocks +# if there is much data on stdout and stderr + +(err_out, stdin) = popen2.popen4("program args") +lines = err_out.read() # collect output into one multiline string + +(err_out, stdin) = popen2.popen4("program args") +lines = err_out.readlines() # collect output into a list, one line per element + +#----------------------------- + +(err_out, stdin) = popen2.popen4("program args") +output = [] +while True: + line = err_out.readline() + if not line: + break + output.appen(line) +output = ''.join(output) + +# @@PLEAC@@_16.2 +import os +myfile = "foo.txt" +status = os.system("vi %s" % myfile) + +#----------------------------- +import os +os.system("cmd1 args | cmd2 | cmd3 >outfile") +os.system("cmd args <infile >outfile 2>errfile") + +status = os.system("%s %s %s" % (program, arg1, arg2)) +if status != 0: + print "%s exited funny: %s" % (program, status) + raise SystemExit + + +# @@PLEAC@@_16.3 +# ----------------------------- +import os +import sys +import glob + +args = glob.glob("*.data") +try: + os.execvp("archive", args) +except OSError, e: + print "Couldn't replace myself with archive: %s" % err + raise SystemExit + +# The error message does not contain the line number like the "die" in +# perl. But if you want to show more information for debugging, you can +# delete the try...except and you get a nice traceback which shows all +# line numbers and filenames. + +# ----------------------------- +os.execvp("archive", ["accounting.data"]) + +# @@PLEAC@@_16.4 +# ------------------------- +# Read from a child process + +import sys +import popen2 +pipe = popen2.Popen4("program arguments") +pid = pipe.pid +for line in pipe.fromchild.readlines(): + sys.stdout.write(line) + +# Popen4 provides stdout and stderr. +# This avoids deadlocks if you get data +# from both streams. +# +# If you don't need the pid, you +# can use popen2.popen4(...) + +# ----------------------------- +# Write to a child process + +import popen2 + +pipe = popen2.Popen4("gzip > foo.gz") +pid = pipe.pid +pipe.tochild.write("Hello zipped world!\n") +pipe.tochild.close() # programm will get EOF on STDIN + +# @@PLEAC@@_16.5 +class OutputFilter(object): + def __init__(self, target, *args, **kwds): + self.target = target + self.setup(*args, **kwds) + self.textbuffer = "" + + def setup(self, *args, **kwds): + pass + + def write(self, data): + if data.endswith("\n"): + data = self.process(self.textbuffer + data) + self.textbuffer = "" + if data is not None: + self.target.write(data) + else: + self.textbuffer += data + + def process(self, data): + return data + +class HeadFilter(OutputFilter): + def setup(self, maxcount): + self.count = 0 + self.maxcount = maxcount + + def process(self, data): + if self.count < self.maxcount: + self.count += 1 + return data + +class NumberFilter(OutputFilter): + def setup(self): + self.count=0 + + def process(self, data): + self.count += 1 + return "%s: %s"%(self.count, data) + +class QuoteFilter(OutputFilter): + def process(self, data): + return "> " + data + +import sys +f = HeadFilter(sys.stdout, 100) +for i in range(130): + print>>f, i + +print + +txt = """Welcome to Linux, version 2.0.33 on a i686 + +"The software required `Windows 95 or better', +so I installed Linux." """ +f1 = NumberFilter(sys.stdout) +f2 = QuoteFilter(f1) +for line in txt.split("\n"): + print>>f2, line +print +f1 = QuoteFilter(sys.stdout) +f2 = NumberFilter(f1) +for line in txt.split("\n"): + print>>f2, line + + +# @@PLEAC@@_16.6 +# This script accepts several filenames +# as argument. If the file is zipped, unzip +# it first. Then read each line if the file +import os +import sys +import popen2 + +for file in sys.argv[1:]: + if file.endswith(".gz") or file.endswith(".Z"): + (stdout, stdin) = popen2.popen2("gzip -dc '%s'" % file) + fd = stdout + else: + fd = open(file) + for line in fd: + # .... + sys.stdout.write(line) + fd.close() +#----------------------------- + +#----------------------------- +# Ask for filename and open it +import sys +print "File, please?" +line = sys.stdin.readline() +file = line.strip() # chomp +open(file) + +# @@PLEAC@@_16.7 +# Execute foo_command and read the output + +import popen2 +(stdout_err, stdin) = popen2.popen4("foo_command") +for line in stdout_err.readlines(): + # .... + +# @@PLEAC@@_16.8 +# Open command in a pipe +# which reads from stdin and writes to stdout + +import popen2 +pipe = popen2.Popen4("wc -l") # Unix command +pipe.tochild.write("line 1\nline 2\nline 3\n") +pipe.tochild.close() +output = pipe.fromchild.read() + +# @@PLEAC@@_16.9 + +# popen3: get stdout and stderr of new process +# Attetion: This can lead to deadlock, +# since the buffer of stderr or stdout might get filled. +# You need to use select if you want to avoid this. + +import popen2 +(child_stdout, child_stdin, child_stderr) = popen2.popen3(...) + +# @@PLEAC@@_16.10 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_16.11 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_16.12 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_16.13 +# +# Print available signals and their value +# See "man signal" "man kill" on unix. + +import signal +for name in dir(signal): + if name.startswith("SIG"): + value = getattr(signal, name) + print "%s=%s" % (name, value) + +# @@PLEAC@@_16.14 +# You can send signals to processes +# with os.kill(pid, signal) + + +# @@PLEAC@@_16.15 +import signal + +def get_sig_quit(signum, frame): + .... + +signal.signal(signal.SIGQUIT, get_sig_quit) # Install handler + +signal.signal(signal.SIGINT, signal.SIG_IGN) # Ignore this signal +signal.signal(signal.SIGSTOP, signal.SIG_DFL) # Restore to default handling + +# @@PLEAC@@_16.16 +# Example of handler: User must Enter Name ctrl-c does not help + +import sys +import signal + +def ding(signum, frame): + print "\aEnter your name!" + return + +signal.signal(signal.SIGINT, ding) +print "Please enter your name:" + +name = "" +while not name: + try: + name = sys.stdin.readline().strip() + except: + pass + +print "Hello: %s" % name + +# @@PLEAC@@_16.17 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_16.18 +import signal + +# ignore signal INT +signal.signal(signal.SIGINT, signal.SIG_IGN) + +# Install signal handler +def tsktsk(signum, frame): + print "..." + +signal.signal(signal.SIGINT, tsktsk) + +# @@PLEAC@@_16.19 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_16.20 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_16.21 +import signal + +def handler(signum, frame): + raise "timeout" + +signal.signal(signal.SIGALRM, handler) + +try: + signal.alarm(5) # signal.alarm(3600) + + # long-time operation + while True: + print "foo" + + signal.alarm(0) +except: + signal.alarm(0) + print "timed out" +else: + print "no time out" + +# @@PLEAC@@_16.22 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_17.0 +# Socket Programming (tcp/ip and udp/ip) + +import socket + +# Convert human readable form to 32 bit value +packed_ip = socket.inet_aton("208.146.240.1") +packed_ip = socket.inet_aton("www.oreilly.com") + +# Convert 32 bit value to ip adress +ip_adress = socket.inet_ntoa(packed_ip) + +# Create socket object +socketobj = socket(family, type) # Example socket.AF_INT, socket.SOCK_STREAM + +# Get socketname +socketobj.getsockname() # Example, get port adress of client + +# @@PLEAC@@_17.1 + +# Example: Connect to a server (tcp) +# Connect to a smtp server at localhost and send an email. +# For real applications you should use smtplib. + +import socket +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.connect(("localhost", 25)) # SMTP +print s.recv(1024) +s.send("mail from: <pleac@localhost>\n") +print s.recv(1024) +s.send("rcpt to: <guettli@localhost>\n") +print s.recv(1024) +s.send("data\n") +print s.recv(1024) +s.send("From: Python Lover\nSubject: Python is better then perl\n\nYES!\n.\n") +print s.recv(1024) +s.close() + +# @@PLEAC@@_17.2 + +# Create a Server, calling handler for every client +# You can test it with "telnet localhost 1029" + +from SocketServer import TCPServer +from SocketServer import BaseRequestHandler + +class MyHandler(BaseRequestHandler): + def handle(self): + print "I got an request" + +server = TCPServer(("127.0.0.1", 1029), MyHandler) +server.serve_forever() + +# @@PLEAC@@_17.3 +# This is the continuation of 17.2 + +import time +from SocketServer import TCPServer +from SocketServer import BaseRequestHandler + +class MyHandler(BaseRequestHandler): + def handle(self): + # self.request is the socket object + print "%s I got an request from ip=%s port=%s" % ( + time.strftime("%Y-%m-%d %H:%M:%S"), + self.client_address[0], + self.client_address[1] + ) + self.request.send("What is your name?\n") + bufsize=1024 + response=self.request.recv(bufsize).strip() # or recv(bufsize, flags) + data_to_send="Welcome %s!\n" % response + self.request.send(data_to_send) # or send(data, flags) + print "%s connection finnished" % self.client_address[0] + +server = TCPServer(("127.0.0.1", 1028), MyHandler) +server.serve_forever() + +# ----------------- +# Using select + +import select +import socket + +in_list = [] +in_list.append(mysocket) +in_list.append(myfile) +# ... + +out_list = [] +out_list.append(...) + +except_list = [] +except_list.append(...) + +(in_, out_, exc_) = select.select(in_list, out_list, except_list, timeout) + +for fd in in_: + print "Can read", fd +for fd in out_: + print "Can write", fd +for fd in exc_: + print "Exception on", fd + +# Missing: setting TCP_NODELAY + +# @@PLEAC@@_17.4 + +import socket +# Set up a UDP socket +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +# send +MSG = 'Hello' +HOSTNAME = '127.0.0.1' +PORTNO = 10000 +s.connect((HOSTNAME, PORTNO)) +if len(MSG) != s.send(MSG): + # where to get error message "$!". + print "cannot send to %s(%d):" % (HOSTNAME,PORTNO) + raise SystemExit(1) +MAXLEN = 1024 +(data,addr) = s.recvfrom(MAXLEN) +s.close() +print '%s(%d) said "%s"' % (addr[0],addr[1], data) + +# download the following standalone program +#!/usr/bin/python +# clockdrift - compare another system's clock with this one + +import socket +import struct +import sys +import time + +if len(sys.argv)>1: + him = sys.argv[1] +else: + him = '127.1' + +SECS_of_70_YEARS = 2208988800 + +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +s.connect((him,socket.getservbyname('time','udp'))) +s.send('') +(ptime, src) = s.recvfrom(4) +host = socket.gethostbyaddr(src[0]) +delta = struct.unpack("!L", ptime)[0] - SECS_of_70_YEARS - time.time() +print "Clock on %s is %d seconds ahead of this one." % (host[0], delta) + + + +# @@PLEAC@@_17.5 + +import socket +import sys + +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +try: + s.bind(('', server_port)) +except socket.error, err: + print "Couldn't be a udp server on port %d : %s" % ( + server_port, err) + raise SystemExit + +while True: + datagram = s.recv(MAX_TO_READ) + if not datagram: + break + # do something +s.close() + +# or +import SocketServer + +class handler(SocketServer.DatagramRequestHandler): + def handle(self): + # do something (with self.request[0]) + +s = SocketServer.UDPServer(('',10000), handler) +s.serve_forever() + +# download the following standalone program +#!/usr/bin/python +# udpqotd - UDP message server + +import SocketServer + +PORTNO = 5151 + +class handler(SocketServer.DatagramRequestHandler): + def handle(self): + newmsg = self.rfile.readline().rstrip() + print "Client %s said ``%s''" % (self.client_address[0], newmsg) + self.wfile.write(self.server.oldmsg) + self.server.oldmsg = newmsg + +s = SocketServer.UDPServer(('',PORTNO), handler) +print "Awaiting UDP messages on port %d" % PORTNO +s.oldmsg = "This is the starting message." +s.serve_forever() + + +# download the following standalone program +#!/usr/bin/python +# udpmsg - send a message to the udpquotd server + +import socket +import sys + +MAXLEN = 1024 +PORTNO = 5151 +TIMEOUT = 5 + +server_host = sys.argv[1] +msg = " ".join(sys.argv[2:]) + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +sock.settimeout(TIMEOUT) +sock.connect((server_host, PORTNO)) +sock.send(msg) +try: + msg = sock.recv(MAXLEN) + ipaddr, port = sock.getpeername() + hishost = socket.gethostbyaddr(ipaddr) + print "Server %s responded ``%s''" % ( hishost[0], msg) +except: + print "recv from %s failed (timeout or no server running)." % ( + server_host ) +sock.close() + + +# @@PLEAC@@_17.6 + +import socket +import os, os.path + +if os.path.exists("/tmp/mysock"): + os.remove("/tmp/mysock") + +server = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) +server.bind("/tmp/mysock") + +client = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) +client.connect("/tmp/mysock") + +# @@PLEAC@@_17.7 + +ipaddr, port = s.getpeername() +hostname, aliaslist, ipaddrlist = socket.gethostbyaddr(ipaddr) +ipaddr = socket.gethostbyname('www.python.org') +# '194.109.137.226' +hostname, aliaslist, ipaddrlist = socket.gethostbyname_ex('www.python.org') +# ('fang.python.org', ['www.python.org'], ['194.109.137.226']) +socket.gethostbyname_ex('www.google.org') +# ('www.l.google.com', ['www.google.org', 'www.google.com'], +# ['64.233.161.147','64.233.161.104', '64.233.161.99']) + +# @@PLEAC@@_17.8 + +import os + +kernel, hostname, release, version, hardware = os.uname() + +import socket + +address = socket.gethostbyname(hostname) +hostname = socket.gethostbyaddr(address) +hostname, aliaslist, ipaddrlist = socket.gethostbyname_ex(hostname) +# e.g. ('lx3.local', ['lx3', 'b70'], ['192.168.0.13', '192.168.0.70']) + +# @@PLEAC@@_17.9 + +socket.shutdown(0) # Further receives are disallowed +socket.shutdown(1) # Further sends are disallowed. +socket.shutdown(2) # Further sends and receives are disallowed. + +# + +server.send("my request\n") # send some data +server.shutdown(1) # send eof; no more writing +answer = server.recv(1000) # but you can still read + +# @@PLEAC@@_17.10 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_17.11 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_17.12 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_17.13 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_17.14 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_17.15 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_17.16 +#------------------------------ +# Restart programm on signal SIGHUP +# Script must be executable: chmod a+x foo.py + +#!/usr/bin/env python +import os +import sys +import time +import signal + +def phoenix(signum, frame): + print "Restarting myself: %s %s" % (self, args) + os.execv(self, args) + +self = os.path.abspath(sys.argv[0]) +args = sys.argv[:] +signal.signal(signal.SIGHUP, phoenix) + +while True: + print "work" + time.sleep(1) + +#-------------------- +# Read config file on SIGHUP +import signal + +config_file = "/usr/local/etc/myprog/server_conf.py" + +def read_config(): + execfile(config_file) + +signal.signal(signal.SIGHUP, read_config) + +# @@PLEAC@@_17.17 + +# chroot + +import os + +try: + os.chroot("/var/daemon") +except Exception: + print "Could not chroot" + raise SystemExit(1) + +#----------------------------- +# fork (Unix): Create a new process +# if pid == 0 --> parent process +# else child process + +import os + +pid = os.fork() +if pid: + print "I am the new child %s" % pid + raise SystemExit +else: + print "I am still the parent" + + +# ---------------------------- +# setsid (Unix): Create a new session +import os +id=os.setsid() + +# ---------------------------- +# Work until INT TERM or HUP signal is received +import time +import signal + +time_to_die = 0 + +def sighandler(signum, frame): + print "time to die" + global time_to_die + time_to_die = 1 + +signal.signal(signal.SIGINT, sighandler) +signal.signal(signal.SIGTERM, sighandler) +signal.signal(signal.SIGHUP, sighandler) + +while not time_to_die: + print "work" + time.sleep(1) + +# @@PLEAC@@_17.18 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_18.1 + +import socket +try: + host_info = socket.gethostbyname_ex(name) + # (hostname, aliaslist, ipaddrlist) +except socket.gaierror, err: + print "Can't resolve hostname %s: %s" % (name, err[1]) + +# if you only need the first one +import socket +try: + address = socket.gethostbyname(name) +except socket.gaierror, err: + print "Can't resolve hostname %s: %s" % (name, err[1]) + +# if you have an ip address +try: + host_info = socket.gethostbyaddr(address) + # (hostname, aliaslist, ipaddrlist) +except socket.gaierror, err: + print "Can't resolve address %s: %s" % (address, err[1]) + +# checking back +import socket +try: + host_info = socket.gethostbyaddr(address) +except socket.gaierror, err: + print "Can't look up %s: %s" % (address, err[1]) + raise SystemExit(1) +try: + host_info = socket.gethostbyname_ex(name) +except: + print "Can't look up %s: %s" % (name, err[1]) + raise SystemExit(1) + +found = address in host_info[2] + +# use dnspython for more complex jobs. +# download the following standalone program +#!/usr/bin/python +# mxhost - find mx exchangers for a host + +import sys + +import dns +import dns.resolver + +answers = dns.resolver.query(sys.argv[1], 'MX') +for rdata in answers: + print rdata.preference, rdata.exchange + + + +# download the following standalone program +#!/usr/bin/python +# hostaddrs - canonize name and show addresses + +import sys +import socket +name = sys.argv[1] +hent = socket.gethostbyname_ex(name) +print "%s aliases: %s => %s" % ( + hent[0], + len(hent[1])==0 and "None" or ",".join(hent[1]), + ",".join(hent[2]) ) + + +# @@PLEAC@@_18.2 +import ftplib +ftp = ftplib.FTP("ftp.host.com") +ftp.login(username, password) +ftp.cwd(directory) + +# get file +outfile = open(filename, "wb") +ftp.retrbinary("RETR %s" % filename, outfile.write) +outfile.close() + +# upload file +upfile = open(upfilename, "rb") +ftp.storbinary("STOR %s" % upfilename, upfile) +upfile.close() + +ftp.quit() + + +# @@PLEAC@@_18.3 +import smtplib +from email.MIMEText import MIMEText + +msg = MIMEText(body) +msg['From'] = from_address +msg['To'] = to_address +msg['Subject'] = subject + +mailer = smtplib.SMTP() +mailer.connect() +mailer.sendmail(from_address, [to_address], msg.as_string()) + +# @@PLEAC@@_18.4 +import nntplib + +# You can except nntplib.NNTPError to process errors +# instead of displaying traceback. + +server = nntplib.NNTP("news.example.com") +response, count, first, last, name = server.group("misc.test") +headers = server.head(first) +bodytext = server.body(first) +article = server.article(first) + +f = file("article.txt") +server.post(f) + +response, grouplist = server.list() +for group in grouplist: + name, last, first, flag = group + if flag == 'y': + pass # I can post to group + +# @@PLEAC@@_18.5 +import poplib + +pop = poplib.POP3("mail.example.com") +pop.user(username) +pop.pass_(password) +count, size = pop.stat() +for i in range(1, count+1): + reponse, message, octets = pop.retr(i) + # message is a list of lines + pop.dele(i) + +# You must quit, otherwise mailbox remains locked. +pop.quit() + +# @@PLEAC@@_18.6 + +import telnetlib + +tn = telnetlib.Telnet(hostname) + +tn.read_until("login: ") +tn.write(user + "\n") +tn.read_until("Password: ") +tn.write(password + "\n") +# read the logon message up to the prompt +d = tn.expect([prompt,], 10) +tn.write("ls\n") +files = d[2].split() +print len(files), "files" +tn.write("exit\n") +print tn.read_all() # blocks till eof + +# @@PLEAC@@_18.7 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_18.8 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_18.9 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_19.0 +# Introduction +# +# There is no standard cgi/web framework in python, +# this is reason for ranting now and then. +# +# See `PyWebOff <http://pyre.third-bit.com/pyweb/index.html>`__ +# which compares CherryPy, Quixote, Twisted, WebWare and Zope +# Karrigell and print stantements. +# +# Then there is Nevow and Standalone ZPT. + +# @@PLEAC@@_19.1 +# Partial implementation of PLEAC Python section 19.1 +# Written by Seo Sanghyeon + +# Standard CGI module is where PERL shines. Python +# module, cgi, is nothing but a form parser. So it is +# not really fair to compare these two. But I hesitate +# to introduce any non-standard module. After all, +# which one should I choose? + +# I would stick to simple print statements. I believe +# the following is close to how these tasks are usually +# done in Python. + +#----------------------------- +#!/usr/bin/env python +# hiweb - using FieldStorage class to get at form data + +import cgi +form = cgi.FieldStorage() + +# get a value from the form +value = form.getvalue("PARAM_NAME") + +# print a standard header +print "Content-Type: text/html" +print + +# print a document +print "<P>You typed: <TT>%s</TT></P>" % ( + cgi.escape(value), + ) + +#----------------------------- +import cgi +form = cgi.FieldStorage() + +who = form.getvalue("Name") +phone = form.getvalue("Number") +picks = form.getvalue("Choices") + +# if you want to assure `picks' to be a list +picks = form.getlist("Choices") + +#----------------------------- +# Not Implemented + +# To implement -EXPIRES => '+3d', I need to study about +import cgi +import datetime + +time_format = "%a, %d %b %Y %H:%M:%S %Z" +print "Expires: %s" % ( + (datetime.datetime.now() + + datetime.timedelta(+3)).strftime(time_format) + ) +print "Date: %s" % (datetime.datetime.now().strftime(time_format)) +print "Content-Type: text/plain; charset=ISO-8859-1" + +#----------------------------- +# NOTES + +# CGI::param() is a multi-purpose function. Here I want to +# note which Python functions correspond to it. + +# PERL version 5.6.1, CGI.pm version 2.80. +# Python version 2.2.3. cgi.py CVS revision 1.68. + +# Assume that `form' is the FieldStorage instance. + +# param() with zero argument returns parameter names as +# a list. It is `form.keys()' in Python, following Python's +# usual mapping interface. + +# param() with one argument returns the value of the named +# parameter. It is `form.getvalue()', but there are some +# twists: + +# 1) A single value is passed. +# No problem. + +# 2) Multiple values are passed. +# PERL: in LIST context, you get a list. in SCALAR context, +# you get the first value from the list. +# Python: `form.getvalue()' returns a list if multiple +# values are passed, a raw value if a single value +# is passed. With `form.getlist()', you always +# get a list. (When a single value is passed, you +# get a list with one element.) With `form.getfirst()', +# you always get a value. (When multiple values are +# passed, you get the first one.) + +# 3) Parameter name is given, but no value is passed. +# PERL: returns an empty string, not undef. POD says this +# feature is new in 2.63, and was introduced to avoid +# "undefined value" warnings when running with the +# -w switch. +# Python: tricky. If you want black values to be retained, +# you should pass a nonzero `keep_blank_values' keyword +# argument. Default is not to retain blanks. In case +# values are not retained, see below. + +# 4) Even parameter name is never mentioned. +# PERL: returns undef. +# Python: returns None, or whatever you passed as the second +# argument, or `default` keyword argument. This is +# consistent with `get()' method of the Python mapping +# interface. + +# param() with more than one argument modifies the already +# set form data. This functionality is not available in Python +# cgi module. + + +# @@PLEAC@@_19.2 +# enable() from 'cgitb' module, by default, redirects traceback +# to the browser. It is defined as 'enable(display=True, logdir=None, +# context=5)'. + +# equivalent to importing CGI::Carp::fatalsToBrowser. +import cgitb +cgitb.enable() + +# to suppress browser output, you should explicitly say so. +import cgitb +cgitb.enable(display=False) + +# equivalent to call CGI::Carp::carpout with temporary files. +import cgitb +cgitb.enable(logdir="/var/local/cgi-logs/") + +# Python exception, traceback facilities are much richer than PERL's +# die and its friends. You can use your custom exception formatter +# by replacing sys.excepthook. (equivalent to CGI::Carp::set_message.) +# Default formatter is available as traceback.print_exc() in pure +# Python. In fact, what cgitb.enable() does is replacing excepthook +# to cgitb.handler(), which knows how to format exceptions to HTML. + +# If this is not enough, (usually this is enough!) Python 2.3 comes +# with a new standard module called 'logging', which is complex, but +# very flexible and entirely customizable. + +# @@PLEAC@@_19.3 +# +# download the following standalone program +#!/usr/bin/python +# webwhoami - show web users id +import getpass +print "Content-Type: text/plain\n" +print "Running as %s\n" % getpass.getuser() + + + +# STDOUT/ERR flushing +# +# In contrast to what the perl cookbook says, modpython.org tells +# STDERR is buffered too. + +# @@PLEAC@@_19.4 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_19.5 + +# use mod_python in the Apache web server. + +# Load the module in httpd.conf or apache.conf + +LoadModule python_module libexec/mod_python.so + +<Directory /some/directory/htdocs/test> + AddHandler mod_python .py + PythonHandler mptest + PythonDebug On +</Directory> + +# test.py file in /some/directory/htdocs/test +from mod_python import apache + +def handler(req): + req.write("Hello World!") + return apache.OK + +# @@PLEAC@@_19.6 + +import os +os.system("command %s %s" % (input, " ".join(files))) # UNSAFE + +# python doc lib cgi-security it says +# +# To be on the safe side, if you must pass a string gotten from a form to a shell +# command, you should make sure the string contains only alphanumeric characters, dashes, +# underscores, and periods. +import re +cmd = "command %s %s" % (input, " ".join(files)) +if re.search(r"[^a-zA-Z0-9._\-]", cmd): + print "rejected" + sys.exit(1) +os.system(cmd) +trans = string.maketrans(string.ascii_letters+string.digits+"-_.", + +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_19.7 +#----------------------------- +# This uses nevow's (http://nevow.com) stan; there's no standard +# way to generate HTML, though there are many implementations of +# this basic idea. +from nevow import tags as T +print T.ol[T.li['red'], T.li['blue'], T.li['green']] +# <ol><li>red</li><li>blue</li><li>green</li></ol> + +names = 'Larry Moe Curly'.split() +print T.ul[ [T.li(type="disc")[name] for name in names] ] +# <ul><li type="disc">Larry</li><li type="disc">Moe</li> +# <li type="disc">Curly</li></ul> +#----------------------------- +print T.li["alpha"] +# <li>alpha</li> + +print T.li['alpha'], T.li['omega'] +# <li>alpha</li> <li>omega</li> +#----------------------------- +states = { + "Wisconsin": [ "Superior", "Lake Geneva", "Madison" ], + "Colorado": [ "Denver", "Fort Collins", "Boulder" ], + "Texas": [ "Plano", "Austin", "Fort Stockton" ], + "California": [ "Sebastopol", "Santa Rosa", "Berkeley" ], +} + +print "<TABLE> <CAPTION>Cities I Have Known</CAPTION>"; +print T.tr[T.th('State'), T.th('Cities')] +for k in sorted(states.keys()): + print T.tr[ [T.th(k)] + [T.td(city) for city in sorted(states[k])] ] +print "</TABLE>"; +#----------------------------- +# <TABLE> <CAPTION>Cities I Have Known</CAPTION> +# +# <TR><TH>State</TH> <TH>Cities</TH></TR> +# +# <TR><TH>California</TH> <TD>Berkeley</TD> <TD>Santa Rosa</TD> +# +# <TD>Sebastopol</TD> </TR> +# +# <TR><TH>Colorado</TH> <TD>Boulder</TD> <TD>Denver</TD> +# +# <TD>Fort Collins</TD> </TR> +# +# <TR><TH>Texas</TH> <TD>Austin</TD> <TD>Fort Stockton</TD> +# +# <TD>Plano</TD></TR> +# +# <TR><TH>Wisconsin</TH> <TD>Lake Geneva</TD> <TD>Madison</TD> +# +# <TD>Superior</TD></TR> +# +# </TABLE> +#----------------------------- +print T.table[ + [T.caption['Cities I have Known'], + T.tr[T.th['State'], T.th['Cities']] ] + + [T.tr[ [T.th(k)] + [T.td(city) for city in sorted(states[k])]] + for k in sorted(states.keys())]] +#----------------------------- +# salcheck - check for salaries +import MySQLdb +import cgi + +form = cgi.FieldStorage() + +if 'limit' in form: + limit = int(form['limit'].value) +else: + limit = '' + +# There's not a good way to start an HTML/XML construct with stan +# without completing it. +print '<html><head><title>Salary Query</title></head><body>' +print T.h1['Search'] +print '<form>' +print T.p['Enter minimum salary', + T.input(type="text", name="limit", value=limit)] +print T.input(type="submit") +print '</form>' + +if limit: + dbconn = MySQLdb.connect(db='somedb', host='server.host.dom', + port=3306, user='username', + passwd='password') + cursor = dbconn.cursor() + cursor.execute(""" + SELECT name, salary FROM employees + WHERE salary > %s""", (limit,)) + + print T.h1["Results"] + print "<TABLE BORDER=1>" + + for row in cursor.fetchall(): + print T.tr[ [T.td(cell) for cell in row] ] + + print "</TABLE>\n"; + cursor.close() + dbconn.close() + +print '</body></html>' +#----------------------------- + +# @@PLEAC@@_19.8 +#----------------------------- +url = "http://python.org/pypi" +print "Location: %s\n" % url +raise SystemExit +#----------------------------- +# oreobounce - set a cookie and redirect the browser +import Cookie +import time + +c = Cookie.SimpleCookie() +c['filling'] = 'vanilla cr?me' +now = time.time() +future = now + 3*(60*60*24*30) # 3 months +expire_date = time.strftime('%a %d %b %Y %H:%M:%S GMT', future) +c['filling']['expires'] = expire_date +c['filling']['domain'] = '.python.org' + +whither = "http://somewhere.python.org/nonesuch.html" + +# Prints the cookie header +print 'Status: 302 Moved Temporarily' +print c +print 'Location:', whither +print + +#----------------------------- +#Status: 302 Moved Temporarily +#Set-Cookie: filling=vanilla%20cr%E4me; domain=.perl.com; +# expires=Tue, 21-Jul-1998 11:58:55 GMT +#Location: http://somewhere.perl.com/nonesuch.html +#----------------------------- +# os_snipe - redirect to a Jargon File entry about current OS +import os, re +dir = 'http://www.wins.uva.nl/%7Emes/jargon' +matches = [ + (r'Mac', 'm/Macintrash.html'), + (r'Win(dows )?NT', 'e/evilandrude.html'), + (r'Win|MSIE|WebTV', 'm/MicroslothWindows.html'), + (r'Linux', 'l/Linux.html'), + (r'HP-UX', 'h/HP-SUX.html'), + (r'SunOS', 's/ScumOS.html'), + (None, 'a/AppendixB.html'), + ] + +for regex, page in matches: + if not regex: # default + break + if re.search(regex, os.environ['HTTP_USER_AGENT']): + break +print 'Location: %s/%s\n' % (dir, page) +#----------------------------- +# There's no special way to print headers +print 'Status: 204 No response' +print +#----------------------------- +#Status: 204 No response +#----------------------------- + +# @@PLEAC@@_19.9 +# download the following standalone program +#!/usr/bin/python +# dummyhttpd - start a HTTP daemon and print what the client sends + +import SocketServer +# or use BaseHTTPServer, SimpleHTTPServer, CGIHTTPServer + +def adr_str(adr): + return "%s:%d" % adr + +class RequestHandler(SocketServer.BaseRequestHandler): + def handle(self): + print "client access from %s" % adr_str(self.client_address) + print self.request.recv(10000) + self.request.send("Content-Type: text/plain\n" + "Server: dymmyhttpd/1.0.0\n" + "\n...\n") + self.request.close() + + +adr = ('127.0.0.1', 8001) +print "Please contact me at <http://%s>" % adr_str(adr) +server = SocketServer.TCPServer(adr, RequestHandler) +server.serve_forever() +server.server_close() + + +# @@PLEAC@@_19.10 + +import Cookie +cookies = Cookie.SimpleCookie() +# SimpleCookie is more secure, but does not support all characters. +cookies["preference-name"] = "whatever you'd like" +print cookies + +# download the following standalone program +#!/usr/bin/python +# ic_cookies - sample CGI script that uses a cookie + +import cgi +import os +import Cookie +import datetime + +cookname = "favorite-ice-cream" # SimpleCookie does not support blanks +fieldname = "flavor" + +cookies = Cookie.SimpleCookie(os.environ.get("HTTP_COOKIE","")) +if cookies.has_key(cookname): + favorite = cookies[cookname].value +else: + favorite = "mint" + +form = cgi.FieldStorage() +if not form.has_key(fieldname): + print "Content-Type: text/html" + print "\n" + print "<html><body>" + print "<h1>Hello Ice Cream</h1>" + print "<form>" + print 'Please select a flavor: <input type="text" name="%s" value="%s" />' % ( + fieldname, favorite ) + print "</form>" + print "<hr />" + print "</body></html>" +else: + favorite = form[fieldname].value + cookies[cookname] = favorite + expire = datetime.datetime.now() + datetime.timedelta(730) + cookies[cookname]["expires"] = expire.strftime("%a, %d %b %Y %H:00:00 GMT") + cookies[cookname]["path"] = "/" + print "Content-Type: text/html" + print cookies + print "\n" + print "<html><body>" + print "<h1>Hello Ice Cream</h1>" + print "<p>You chose as your favorite flavor \"%s\"</p>" % favorite + print "</body></html>" + + +# @@PLEAC@@_19.11 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_19.12 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_19.13 +#----------------------------- +# first open and exclusively lock the file +import os, cgi, fcntl, cPickle +fh = open('/tmp/formlog', 'ab') +fcntl.flock(fh.fileno(), fcntl.LOCK_EX) + +form = cgi.FieldStorage() +# This doesn't produce a readable file; we copy the environment so +# that we save a plain dictionary (os.environ is a dictionary-like +# object). +cPickle.dump((form, os.environ.copy()) fh) +fh.close() +#----------------------------- +import cgi, smtplib, sys + +form = cgi.FieldStorage() +email = """\ +From: %S +To: hisname@hishost.com +Subject: mailed form submission + +""" % sys.argv[0] + +for key in form: + values = form[key] + if not isinstance(values, list): + value = [values.value] + else: + value = [v.value for v in values] + for item in values: + email += '\n%s: %s' % (key, value) + +server = smtplib.SMTP('localhost') +server.sendmail(sys.argv[0], ['hisname@hishost.com'], email) +server.quit() +#----------------------------- +# @@INCOMPLETE@@ I don't get the point of these: +# param("_timestamp", scalar localtime); +# param("_environs", %ENV); +#----------------------------- +import fcntl, cPickle +fh = open('/tmp/formlog', 'rb') +fcntl.flock(fh.fileno(), fcntl.LOCK_SH) + +count = 0 +while True: + try: + form, environ = cPickle.load(fh) + except EOFError: + break + if environ.get('REMOTE_HOST').endswith('perl.com'): + continue + if 'items requested' in form: + count += int(form['items requested'].value) +print 'Total orders:', count +#----------------------------- + +# @@PLEAC@@_19.14 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_20.1 +#----------------------------- +import urllib +content = urllib.urlopen(url).read() + +try: + import urllib + content = urllib.urlopen(url).read() +except IOError: + print "could not get %s" % url + +#----------------------------- +# download the following standalone program +#!/usr/bin/python +# titlebytes - find the title and size of documents +# +# differences to perl +# +# * no URI::Heuristics +# * perl LWP supports fetching files from local system +# * fetching a title from ftp or file doesnt work in perl either. + +import sys, urllib2, HTMLParser +if len(sys.argv)<=1: + print "usage: %s url" % sys.argv[0] + sys.exit(1) +raw_url = sys.argv[1] + +# python has no equivalent to pearls URI::Heuristics, which +# would do some guessing like : +# +# perl -> http://www.perl.com +# www.oreilly.com -> http://www.oreilly.com +# ftp.funet.fi -> ftp://ftp.funet.fi +# /etc/passwd -> file:/etc/passwd + +# simple but pedantic html parser: tpj.com breaks it. +class html(HTMLParser.HTMLParser): + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self._data = {} + self._open_tags = [] + def handle_starttag(self, tag, attrs): + self._open_tags.append(tag) + def handle_endtag(self, tag): + if len(self._open_tags)>0: + self._open_tags.pop() + def handle_data(self, data): + if len(self._open_tags)>0: + self._data[self._open_tags[-1]] = data + def __getattr__(self,attr): + if not self._data.has_key(attr): + return "" + return self._data[attr] + +url = raw_url +print "%s =>\n\t" % url, +# TODO fake user agent "Schmozilla/v9.17 Platinum" +# TODO referer "http://wizard.yellowbrick.oz" +# as we only do http httplib would do also +try: + response = urllib2.urlopen(url) +except: + print " %s" % sys.exc_info()[1].reason[1] + sys.exit(1) +# title is not in response +data = response.read() +parser = html() +parser.feed(data) +parser.close() # force processing all data +count = len(data.split("\n")) +bytes = len(data) +print "%s (%d lines, %d bytes)" % (parser.title, + count, + bytes) + +# omly bytes is in response.info() + + +# @@PLEAC@@_20.2 + +# GET method +import httplib +conn = httplib.HTTPConnection('www.perl.com') +conn.request('GET','/cgi-bin/cpan_mod?module=DB_File&readme=1') +r1 = conn.getresponse() +content = r1.read() + +# POST method +import urllib +params = urllib.urlencode({'module': 'DB_File', 'readme': 1}) +content = urllib.urlopen('http://www.perl.com', params).read() + +# fields must be properly escaped +# script.cgi?field1?arg=%22this%20isn%27t%20%3CEASY%3E%22 + +# proxies can be taken from environment, or specified +# as the optional thrid parameter to urlopen. + +# @@PLEAC@@_20.3 +# download the following standalone program +#!/usr/bin/python +# xurl - extract unique, sorted list of links from URL + +from HTMLParser import HTMLParser +import urllib +from sets import Set as set # not needed in 2.4 +class myParser(HTMLParser): + def __init__(self, url): + self.baseUrl = url[:url.rfind('/')] + HTMLParser.__init__(self) + def reset(self): + self.urls = set() + HTMLParser.reset(self) + def handle_starttag(self, tag, attrs): + if tag == 'a': + if attrs[0][0] == 'href': + if attrs[0][1].find(':') == -1: + # we need to add the base URL. + self.urls.add(self.baseUrl + '/' + attrs[0][1]) + else: + self.urls.add(attrs[0][1]) +url = 'http://www.perl.com/CPAN' +p = myParser(url) +s = urllib.urlopen(url) +data = s.read() +p.feed(data) +urllist = p.urls._data.keys() +urllist.sort() +print '\n'.join(urllist) + + + +# @@PLEAC@@_20.4 +# Converting ASCII to HTML + +# download the following standalone program +#!/usr/bin/python +# text2html - trivial html encoding of normal text + +import sys +import re + +# precompile regular expressions +re_quoted = re.compile(r"(?m)^(>.*?)$") +re_url = re.compile(r"<URL:(.*)>") +re_http = re.compile(r"(http:\S+)") +re_strong = re.compile(r"\*(\S+)\*") +re_em = re.compile(r"\b_(\S+)_\b") + +# split paragraphs +for para in open(sys.argv[1]).read().split("\n\n"): + # TODO encode entities: dont encode "<>" but do "&" + if para.startswith(" "): + print "<pre>\n%s\n</pre>" % para + else: + para = re_quoted.sub(r"\1<br />", para) # quoted text + para = re_url.sub(r'<a href="\1">\1</a>', para) # embedded URL + para = re_http.sub(r'<a href="\1">\1</a>', para) # guessed URL + para = re_strong.sub(r"<strong>\1</strong>",para) # this is *bold* here + para = re_em.sub(r"<em>\1</em>",para) # this is _italic_ here + print "<p>\n%s\n</p>" % para # add paragraph tags + + + +#----------------------------- +import sys, re +import htmlentitydefs + +def encode_entities(s): + for k,v in htmlentitydefs.codepoint2name.items(): + if k<256: # no unicodes + s = s.replace(chr(k),"&%s;"%v) + return s + +print "<table>" +text = sys.stdin.read() +text = encode_entities(text) +text = re.sub(r"(\n[ \t]+)"," . ",text) # continuation lines +text = re.sub(r"(?m)^(\S+?:)\s*(.*?)$", + r'<tr><th align="left">\1</th><td>\2</td></tr>', + text); +print text +print "</table>" + +# @@PLEAC@@_20.5 +# Converting HTML to ASCII + +#----------------------------- +import os +ascii = os.popen("lynx -dump " + filename).read() + +#----------------------------- +import formatter +import htmllib + +w = formatter.DumbWriter() +f = formatter.AbstractFormatter(w) +p = htmllib.HTMLParser(f) +p.feed(html) +p.close() + +# Above is a bare minimum to use writer/formatter/parser +# framework of Python. + +# Search Python Cookbook for more details, like writing +# your own writers or formatters. + +# Recipe #52297 has TtyFormatter, formatting underline +# and bold in Terminal. Recipe #135005 has a writer +# accumulating text instead of printing. + +# @@PLEAC@@_20.6 + +import re + +plain_text = re.sub(r"<[^>]*>","",html_text) #WRONG + +# using HTMLParser +import sys, HTMLParser + +class html(HTMLParser.HTMLParser): + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self._plaintext = "" + self._ignore = False + def handle_starttag(self, tag, attrs): + if tag == "script": + self._ignore = True + def handle_endtag(self, tag): + if tag == "script": + self._ignore = False + def handle_data(self, data): + if len(data)>0 and not self._ignore: + self._plaintext += data + def get_plaintext(self): + return self._plaintext + def error(self,msg): + # ignore all errors + pass + +html_text = open(sys.argv[1]).read() + +parser = html() +parser.feed(html_text) +parser.close() # force processing all data +print parser.get_plaintext() + +title_s = re.search(r"(?i)<title>\s*(.*?)\s*</title>", text) +title = title_s and title_s.groups()[0] or "NO TITLE" + +# download the following standalone program +#!/usr/bin/python +# htitlebytes - get html title from URL +# + +import sys, urllib2, HTMLParser +if len(sys.argv)<=1: + print "usage: %s url ..." % sys.argv[0] + sys.exit(1) + +# simple but pedantic html parser: tpj.com breaks it. +class html(HTMLParser.HTMLParser): + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self._data = {} + self._open_tags = [] + def handle_starttag(self, tag, attrs): + self._open_tags.append(tag) + def handle_endtag(self, tag): + if len(self._open_tags)>0: + self._open_tags.pop() + def handle_data(self, data): + if len(self._open_tags)>0: + self._data[self._open_tags[-1]] = data + def __getattr__(self,attr): + if not self._data.has_key(attr): + return "" + return self._data[attr] + def error(self,msg): + # ignore all errors + pass + +for url in sys.argv[1:]: + print "%s: " % url, + # TODO fake user agent "Schmozilla/v9.17 Platinum" + # TODO referer "http://wizard.yellowbrick.oz" + # as we only do http httplib would do also + try: + response = urllib2.urlopen(url) + except: + print " %s" % sys.exc_info()[1] + sys.exit(1) + # title is not in response + parser = html() + parser.feed(response.read()) + parser.close() # force processing all data + print parser.title + + + +# @@PLEAC@@_20.7 +# download the following standalone program +#!/usr/bin/python +# churl - check urls + +import sys + +# head request +import urllib +def valid(url): + try: + conn = urllib.urlopen(url) + return 1 + except: + return 0 + +# parser class as in xurl +from HTMLParser import HTMLParser +from sets import Set as set # not needed in 2.4 +class myParser(HTMLParser): + def __init__(self, url): + self.baseUrl = url[:url.rfind('/')] + HTMLParser.__init__(self) + def reset(self): + self.urls = set() + HTMLParser.reset(self) + def handle_starttag(self, tag, attrs): + if tag == 'a': + if attrs[0][0] == 'href': + if attrs[0][1].find(':') == -1: + # we need to add the base URL. + self.urls.add(self.baseUrl + '/' + attrs[0][1]) + else: + self.urls.add(attrs[0][1]) + +if len(sys.argv)<=1: + print "usage: %s <start_url>" % (sys.argv[0]) + sys.exit(1) + +base_url = sys.argv[1] +print base_url+":" +p = myParser(base_url) +s = urllib.urlopen(base_url) +data = s.read() +p.feed(data) +for link in p.urls._data.keys(): + state = "UNKNOWN URL" + if link.startswith("http:"): + state = "BAD" + if valid(link): + state = "OK" + print " %s: %s" % (link, state) + + + +# @@PLEAC@@_20.8 +# download the following standalone program +#!/usr/bin/python +# surl - sort URLs by their last modification date + +import urllib +import time +import sys + +Date = {} +while 1: + # we only read from stdin not from argv. + ln = sys.stdin.readline() + if not ln: + break + ln = ln.strip() + try: + u = urllib.urlopen(ln) + date = time.mktime(u.info().getdate("date")) + if not Date.has_key(date): + Date[date] = [] + Date[date].append(ln) + except: + sys.stderr.write("%s: %s!\n" % (ln, sys.exc_info()[1])) + +dates = Date.keys() +dates.sort() # python 2.4 would have sorted +for d in dates: + print "%s %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(d)), + ", ".join(Date[d])) + + +# @@PLEAC@@_20.9 +import re + +def template(filename, fillings): + text = open(filename).read() + def repl(matchobj): + if fillings.has_key(matchobj.group(1)): + return str(fillings[matchobj.group(1)]) + return "" + # replace quoted words with value from fillings dictionary + text = re.sub("%%(.+?)%%", repl, text) + return text + +fields = { "username":"peter", "count":"23", "total": "1234"} +print template("/home/httpd/templates/simple.template", fields) + +# download the following standalone program +#!/usr/bin/python +# userrep1 - report duration of user logins using SQL database + +import MySQLdb +import cgi +import re +import sys + +def template(filename, fillings): + text = open(filename).read() + def repl(matchobj): + if fillings.has_key(matchobj.group(1)): + return str(fillings[matchobj.group(1)]) + return "" + # replace quoted words with value from fillings dictionary + text = re.sub("%%(.+?)%%", repl, text) + return text + +fields = cgi.FieldStorage() +if not fields.has_key("user"): + print "Content-Type: text/plain\n" + print "No username" + sys.exit(1) + +def get_userdata(username): + db = MySQLdb.connect(passwd="",db="connections", user="bert") + db.query("select count(duration) as count," + +" sum(duration) as total from logins" + +" where username='%s'" % username) + res = db.store_result().fetch_row(maxrows=1,how=1) + res[0]["username"] = username + db.close() + return res[0] + +print "Content-Type: text/html\n" + +print template("report.tpl", get_userdata(fields["user"].value)) + +# @@INCOMPLETE@@ + +# @@PLEAC@@_20.10 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_20.11 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_20.12 + +# sample data, use ``LOGFILE = open(sys.argv[1])`` in real life +LOGFILE = [ + '127.0.0.1 - - [04/Sep/2005:20:50:31 +0200] "GET /bus HTTP/1.1" 301 303\n', + '127.0.0.1 - - [04/Sep/2005:20:50:31 +0200] "GET /bus HTTP/1.1" 301 303 "-" "Opera/8.02 (X11; Linux i686; U; en)"\n', + '192.168.0.1 - - [04/Sep/2005:20:50:36 +0200] "GET /bus/libjs/layersmenu-library.js HTTP/1.1" 200 6228\n', + '192.168.0.1 - - [04/Sep/2005:20:50:36 +0200] "GET /bus/libjs/layersmenu-library.js HTTP/1.1" 200 6228 "http://localhost/bus/" "Opera/8.02 (X11; Linux i686; U; en)"\n', + ] + +import re + +# similar too perl version. +web_server_log_re = re.compile(r'^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] "(\S+) (.*?) (\S+)" (\S+) (\S+)$') + +# with group naming. +split_re = re.compile(r'''(?x) # allow nicer formatting (but requires escaping blanks) + ^(?P<client>\S+)\s + (?P<identuser>\S+)\s + (?P<authuser>\S+)\s + \[ + (?P<date>[^:]+): + (?P<time>[\d:]+)\s + (?P<tz>[^\]]+) + \]\s + " + (?P<method>\S+)\s + (?P<url>.*?)\s + (?P<protocol>\S+) + "\s + (?P<status>\S+)\s + (?P<bytes>\S+) + (?: + \s + " + (?P<referrer>[^"]+) + "\s + " + (?P<agent>[^"]+) + " + )?''') +for line in LOGFILE: + f = split_re.match(line) + if f: + print "agent = %s" % f.groupdict()['agent'] + +# @@PLEAC@@_20.13 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + +# @@PLEAC@@_20.14 +# @@INCOMPLETE@@ +# @@INCOMPLETE@@ + |