From f44947c8a049c46e0a9d0aba668dc089d468db1c Mon Sep 17 00:00:00 2001 From: Worros Date: Tue, 6 Jul 2010 16:13:25 +0800 Subject: [PATCH] Add a new sample file for parsing website results. Decided to knock up a quick proof of concept for fetching and parsing tournament results from a website. Specifically uses: - Pythons Beautiful Soup library - P5s results pages - replace playername with the desired p5s username (eg. taypaur) Of note; no tournament ids are displayed on any of the sites, so for this to work we may need to add a field like 'p5sid' --- pyfpdb/P5sResultsParser.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 pyfpdb/P5sResultsParser.py diff --git a/pyfpdb/P5sResultsParser.py b/pyfpdb/P5sResultsParser.py new file mode 100644 index 00000000..9d08daa2 --- /dev/null +++ b/pyfpdb/P5sResultsParser.py @@ -0,0 +1,33 @@ +import urllib2, re +import pprint +from BeautifulSoup import BeautifulSoup + + +playername = '' + +if playername == '': + print "You need to manually enter the playername" + exit(0) + +page = urllib2.urlopen("http://www.pocketfives.com/poker-scores/%s/" %playername) +soup = BeautifulSoup(page) + +results = [] + +for table in soup.findAll('table'): +# print "Found %s" % table + for row in table.findAll('tr'): + tmp = [] + for col in row.findAll('td'): + tmp = tmp + [col.string] + #print col.string + if len(tmp) > 3 and tmp[2] <> None: + results = results + [tmp] + +cols = ['TOURNAMENT', 'SITE', 'DATE', 'PRIZEPOOL', 'BUY-IN', 'PLACE', 'WON'] + +pp = pprint.PrettyPrinter(indent=4) + +for result in results: + print "Site: %s Date: %s\tPrizepool: %s\tBuyin: %s\tPosition: %s\tWon: %s" %(result[2], result[3], result[4], result[5], result[6], result[7]) +