fpdb/pyfpdb/HandHistoryConverter.py

348 lines
14 KiB
Python
Raw Normal View History

#!/usr/bin/python
#Copyright 2008 Carl Gherardi
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU Affero General Public License as published by
#the Free Software Foundation, version 3 of the License.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU Affero General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#In the "official" distribution you can find the license in
#agpl-3.0.txt in the docs folder of the package.
import Hand
import re
import sys
import traceback
2009-03-04 17:46:01 +01:00
import logging
from optparse import OptionParser
import os
import os.path
import xml.dom.minidom
import codecs
from decimal import Decimal
import operator
from xml.dom.minidom import Node
import time
import datetime
2009-07-15 00:34:47 +02:00
#import gettext
#gettext.install('fpdb')
2009-03-12 13:24:23 +01:00
class HandHistoryConverter():
READ_CHUNK_SIZE = 10000 # bytes to read at a time from file (in tail mode)
def __init__(self, in_path = '-', out_path = '-', sitename = None, follow=False):
2009-07-15 00:34:47 +02:00
logging.info("HandHistory init")
# default filetype and codepage. Subclasses should set these properly.
2008-12-06 15:15:41 +01:00
self.filetype = "text"
self.codepage = "utf8"
2009-07-15 00:34:47 +02:00
self.in_path = in_path
self.out_path = out_path
2009-07-15 00:34:47 +02:00
if in_path == '-':
self.in_fh = sys.stdin
if out_path == '-':
self.out_fh = sys.stdout
else:
2009-07-15 00:34:47 +02:00
# TODO: out_path should be sanity checked.
self.out_fh = open(self.out_path, 'w')
self.sitename = sitename
self.follow = follow
self.compiledPlayers = set()
self.maxseats = 10
2008-12-06 15:15:41 +01:00
def __str__(self):
2009-07-15 00:34:47 +02:00
return """
HandHistoryConverter: '%(sitename)s'
filetype: '%(filetype)s'
in_path: '%(in_path)s'
out_path: '%(out_path)s'
""" % { 'sitename':self.sitename, 'filetype':self.filetype, 'in_path':self.in_path, 'out_path':self.out_path }
2008-12-06 15:15:41 +01:00
2009-03-12 13:24:23 +01:00
def start(self):
"""process a hand at a time from the input specified by in_path.
If in follow mode, wait for more data to turn up.
2009-07-15 00:34:47 +02:00
Otherwise, finish at eof.
"""
starttime = time.time()
if not self.sanityCheck():
print "Cowardly refusing to continue after failed sanity check"
return
if self.follow:
numHands = 0
for handText in self.tailHands():
numHands+=1
self.processHand(handText)
else:
handsList = self.allHandsAsList()
logging.info("Parsing %d hands" % len(handsList))
for handText in handsList:
self.processHand(handText)
numHands= len(handsList)
endtime = time.time()
print "read %d hands in %.3f seconds" % (numHands, endtime - starttime)
if self.out_fh != sys.stdout:
self.out_fh.close()
def tailHands(self):
"""Generator of handTexts from a tailed file:
Tail the in_path file and yield handTexts separated by re_SplitHands.
This requires a regex that greedily groups and matches the 'splitter' between hands,
which it expects to find at self.re_TailSplitHands -- see for e.g. Everleaf.py.
"""
2009-03-12 13:24:23 +01:00
if self.in_path == '-': raise StopIteration
interval = 1.0 # seconds to sleep between reads for new data
2009-03-12 13:24:23 +01:00
fd = codecs.open(self.in_path,'r', self.codepage)
data = ''
while 1:
where = fd.tell()
newdata = fd.read(self.READ_CHUNK_SIZE)
if not newdata:
fd_results = os.fstat(fd.fileno())
try:
2009-03-12 13:24:23 +01:00
st_results = os.stat(self.in_path)
except OSError:
st_results = fd_results
if st_results[1] == fd_results[1]:
time.sleep(interval)
fd.seek(where)
else:
logging.debug("%s changed inode numbers from %d to %d" % (self.in_path, fd_results[1], st_results[1]))
2009-03-12 13:24:23 +01:00
fd = codecs.open(self.in_path, 'r', self.codepage)
fd.seek(where)
else:
# yield hands
data = data + newdata
result = self.re_TailSplitHands.split(data)
result = iter(result)
data = ''
# --x data (- is bit of splitter, x is paragraph) yield,...,keep
# [,--,x] result of re.split (with group around splitter)
# ,x our output: yield nothing, keep x
#
# --x--x [,--,x,--,x] x,x
# -x--x [-x,--,x] x,x
# x- [x-] ,x-
# x-- [x,--,] x,--
# x--x [x,--,x] x,x
# x--x-- [x,--,x,--,] x,x,--
# The length is always odd.
# 'odd' indices are always splitters.
# 'even' indices are always paragraphs or ''
# We want to discard all the ''
# We want to discard splitters unless the final item is '' (because the splitter could grow with new data)
# We want to yield all paragraphs followed by a splitter, i.e. all even indices except the last.
for para in result:
try:
result.next()
splitter = True
except StopIteration:
splitter = False
if splitter: # para is followed by a splitter
if para: yield para # para not ''
else:
data = para # keep final partial paragraph
def allHandsAsList(self):
"""Return a list of handtexts in the file at self.in_path"""
#TODO : any need for this to be generator? e.g. stars support can email one huge file of all hands in a year. Better to read bit by bit than all at once.
self.readFile()
self.obs = self.obs.strip()
self.obs = self.obs.replace('\r\n', '\n')
if self.obs == "" or self.obs == None:
logging.info("Read no hands.")
return
2009-04-02 22:16:36 +02:00
return re.split(self.re_SplitHands, self.obs)
def processHand(self, handText):
gametype = self.determineGameType(handText)
2009-03-06 19:10:04 +01:00
logging.debug("gametype %s" % gametype)
if gametype is None:
l = None
gametype = "unmatched"
# TODO: not ideal, just trying to not error.
# TODO: Need to count failed hands.
else:
# See if gametype is supported.
type = gametype['type']
base = gametype['base']
limit = gametype['limitType']
l = [type] + [base] + [limit]
if l in self.readSupportedGames():
hand = None
if gametype['base'] == 'hold':
logging.debug("hand = Hand.HoldemOmahaHand(self, self.sitename, gametype, handtext)")
hand = Hand.HoldemOmahaHand(self, self.sitename, gametype, handText)
elif gametype['base'] == 'stud':
hand = Hand.StudHand(self, self.sitename, gametype, handText)
elif gametype['base'] == 'draw':
hand = Hand.DrawHand(self, self.sitename, gametype, handText)
else:
logging.info("Unsupported game type: %s" % gametype)
if hand:
hand.writeHand(self.out_fh)
else:
2009-03-06 19:10:04 +01:00
logging.info("Unsupported game type: %s" % gametype)
# TODO: pity we don't know the HID at this stage. Log the entire hand?
# From the log we can deduce that it is the hand after the one before :)
2008-12-06 15:15:41 +01:00
# These functions are parse actions that may be overridden by the inheriting class
# This function should return a list of lists looking like:
# return [["ring", "hold", "nl"], ["tour", "hold", "nl"]]
# Showing all supported games limits and types
2008-12-06 15:15:41 +01:00
def readSupportedGames(self): abstract
# should return a list
# type base limit
# [ ring, hold, nl , sb, bb ]
# Valid types specified in docs/tabledesign.html in Gametypes
def determineGameType(self, handText): abstract
"""return dict with keys/values:
'type' in ('ring', 'tour')
'limitType' in ('nl', 'cn', 'pl', 'cp', 'fl')
'base' in ('hold', 'stud', 'draw')
'category' in ('holdem', 'omahahi', omahahilo', 'razz', 'studhi', 'studhilo', 'fivedraw', '27_1draw', '27_3draw', 'badugi')
'hilo' in ('h','l','s')
'smallBlind' int?
'bigBlind' int?
'smallBet'
'bigBet'
'currency' in ('USD', 'EUR', 'T$', <countrycode>)
or None if we fail to get the info """
#TODO: which parts are optional/required?
2008-12-06 15:15:41 +01:00
# Read any of:
2009-02-27 19:42:53 +01:00
# HID HandID
# TABLE Table name
# SB small blind
# BB big blind
# GAMETYPE gametype
# YEAR MON DAY HR MIN SEC datetime
# BUTTON button seat number
2008-12-06 15:15:41 +01:00
def readHandInfo(self, hand): abstract
# Needs to return a list of lists in the format
# [['seat#', 'player1name', 'stacksize'] ['seat#', 'player2name', 'stacksize'] [...]]
def readPlayerStacks(self, hand): abstract
def compilePlayerRegexs(self): abstract
"""Compile dynamic regexes -- these explicitly match known player names and must be updated if a new player joins"""
2008-12-06 15:15:41 +01:00
# Needs to return a MatchObject with group names identifying the streets into the Hand object
# so groups are called by street names 'PREFLOP', 'FLOP', 'STREET2' etc
# blinds are done seperately
2008-12-06 15:15:41 +01:00
def markStreets(self, hand): abstract
#Needs to return a list in the format
# ['player1name', 'player2name', ...] where player1name is the sb and player2name is bb,
# addtional players are assumed to post a bb oop
def readBlinds(self, hand): abstract
def readAntes(self, hand): abstract
def readBringIn(self, hand): abstract
def readButton(self, hand): abstract
2008-12-06 15:15:41 +01:00
def readHeroCards(self, hand): abstract
2009-02-25 11:32:12 +01:00
def readPlayerCards(self, hand, street): abstract
2008-12-06 15:15:41 +01:00
def readAction(self, hand, street): abstract
2008-12-09 16:32:37 +01:00
def readCollectPot(self, hand): abstract
def readShownCards(self, hand): abstract
2008-12-06 15:15:41 +01:00
# Some sites don't report the rake. This will be called at the end of the hand after the pot total has been calculated
# an inheriting class can calculate it for the specific site if need be.
def getRake(self, hand):
hand.rake = hand.totalpot - hand.totalcollected # * Decimal('0.05') # probably not quite right
2008-12-06 15:15:41 +01:00
def sanityCheck(self):
"""Check we aren't going to do some stupid things"""
#TODO: the hhbase stuff needs to be in fpdb_import
sane = False
2008-12-06 15:15:41 +01:00
base_w = False
#~ #Check if hhbase exists and is writable
#~ #Note: Will not try to create the base HH directory
#~ if not (os.access(self.hhbase, os.W_OK) and os.path.isdir(self.hhbase)):
#~ print "HH Sanity Check: Directory hhbase '" + self.hhbase + "' doesn't exist or is not writable"
#~ else:
#~ #Check if hhdir exists and is writable
#~ if not os.path.isdir(self.hhdir):
#~ # In first pass, dir may not exist. Attempt to create dir
#~ print "Creating directory: '%s'" % (self.hhdir)
#~ os.mkdir(self.hhdir)
#~ sane = True
#~ elif os.access(self.hhdir, os.W_OK):
#~ sane = True
#~ else:
#~ print "HH Sanity Check: Directory hhdir '" + self.hhdir + "' or its parent directory are not writable"
2008-12-06 15:15:41 +01:00
# Make sure input and output files are different or we'll overwrite the source file
if True: # basically.. I don't know
sane = True
if(self.in_path != '-' and self.out_path == self.in_path):
print "HH Sanity Check: output and input files are the same, check config"
sane = False
2008-12-06 15:15:41 +01:00
return sane
# Functions not necessary to implement in sub class
def setFileType(self, filetype = "text", codepage='utf8'):
2008-12-06 15:15:41 +01:00
self.filetype = filetype
self.codepage = codepage
2008-12-06 15:15:41 +01:00
def splitFileIntoHands(self):
hands = []
self.obs = self.obs.strip()
list = self.re_SplitHands.split(self.obs)
2008-12-06 15:15:41 +01:00
list.pop() #Last entry is empty
for l in list:
# print "'" + l + "'"
hands = hands + [Hand.Hand(self.sitename, self.gametype, l)]
2008-12-06 15:15:41 +01:00
return hands
def readFile(self):
"""open in_path according to self.codepage"""
2008-12-06 15:15:41 +01:00
if(self.filetype == "text"):
if self.in_path == '-':
# read from stdin
2009-02-26 16:36:23 +01:00
logging.debug("Reading stdin with %s" % self.codepage) # is this necessary? or possible? or what?
in_fh = codecs.getreader('cp1252')(sys.stdin)
else:
logging.debug("Opening %s with %s" % (self.in_path, self.codepage))
in_fh = codecs.open(self.in_path, 'r', self.codepage)
self.obs = in_fh.read()
in_fh.close()
2008-12-06 15:15:41 +01:00
elif(self.filetype == "xml"):
try:
doc = xml.dom.minidom.parse(filename)
self.doc = doc
except:
traceback.print_exc(file=sys.stderr)
def getStatus(self):
#TODO: Return a status of true if file processed ok
return True
def getProcessedFile(self):
return self.out_path