2008-11-07 09:47:00 +01:00
#!/usr/bin/python
#Copyright 2008 Carl Gherardi
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU Affero General Public License as published by
#the Free Software Foundation, version 3 of the License.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU Affero General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#In the "official" distribution you can find the license in
#agpl-3.0.txt in the docs folder of the package.
2008-12-14 20:25:04 +01:00
import Hand
2008-11-09 12:57:58 +01:00
import re
2008-11-09 01:46:14 +01:00
import sys
import traceback
2009-03-04 17:46:01 +01:00
import logging
2009-02-26 01:59:36 +01:00
from optparse import OptionParser
2008-11-09 03:58:46 +01:00
import os
import os . path
2008-11-09 01:46:14 +01:00
import xml . dom . minidom
2008-12-10 17:30:57 +01:00
import codecs
2008-12-05 03:40:04 +01:00
from decimal import Decimal
import operator
2008-11-09 01:46:14 +01:00
from xml . dom . minidom import Node
2008-12-17 00:23:33 +01:00
import time
import datetime
2008-12-10 00:30:58 +01:00
2008-12-16 00:56:19 +01:00
import gettext
2009-07-15 01:26:53 +02:00
gettext . install ( ' fpdb ' )
2008-12-16 00:56:19 +01:00
2009-03-12 13:24:23 +01:00
class HandHistoryConverter ( ) :
2009-03-24 15:58:14 +01:00
READ_CHUNK_SIZE = 10000 # bytes to read at a time from file (in tail mode)
2009-07-17 11:45:22 +02:00
def __init__ ( self , in_path = ' - ' , out_path = ' - ' , sitename = None , follow = False , index = 0 ) :
2009-07-15 00:34:47 +02:00
logging . info ( " HandHistory init " )
2009-02-26 01:59:36 +01:00
# default filetype and codepage. Subclasses should set these properly.
2008-12-06 15:15:41 +01:00
self . filetype = " text "
2008-12-10 17:30:57 +01:00
self . codepage = " utf8 "
2009-07-17 11:45:22 +02:00
self . index = 0
2009-07-15 00:34:47 +02:00
2009-02-26 01:59:36 +01:00
self . in_path = in_path
self . out_path = out_path
2009-07-19 09:45:18 +02:00
self . processedHands = [ ]
2009-07-15 00:34:47 +02:00
if in_path == ' - ' :
self . in_fh = sys . stdin
if out_path == ' - ' :
2009-02-26 01:59:36 +01:00
self . out_fh = sys . stdout
else :
2009-07-15 00:34:47 +02:00
# TODO: out_path should be sanity checked.
2009-07-30 05:40:16 +02:00
out_dir = os . path . dirname ( self . out_path )
if not os . path . isdir ( out_dir ) :
logging . info ( " Creatin directory ' %s ' " % out_dir )
2009-08-06 05:20:28 +02:00
os . makedirs ( out_dir )
2009-07-15 00:34:47 +02:00
self . out_fh = open ( self . out_path , ' w ' )
2009-02-26 01:59:36 +01:00
self . sitename = sitename
self . follow = follow
2009-03-02 00:22:47 +01:00
self . compiledPlayers = set ( )
2009-02-24 17:17:25 +01:00
self . maxseats = 10
2008-12-06 15:15:41 +01:00
def __str__ ( self ) :
2009-07-15 00:34:47 +02:00
return """
HandHistoryConverter : ' %(sitename)s '
filetype : ' %(filetype)s '
in_path : ' %(in_path)s '
out_path : ' %(out_path)s '
""" % { ' sitename ' :self.sitename, ' filetype ' :self.filetype, ' in_path ' :self.in_path, ' out_path ' :self.out_path }
2008-12-06 15:15:41 +01:00
2009-03-12 13:24:23 +01:00
def start ( self ) :
2009-03-10 17:17:54 +01:00
""" process a hand at a time from the input specified by in_path.
2009-03-10 00:03:17 +01:00
If in follow mode , wait for more data to turn up .
2009-07-15 00:34:47 +02:00
Otherwise , finish at eof .
2009-04-02 22:20:41 +02:00
2009-07-15 06:22:46 +02:00
"""
2009-03-10 22:49:23 +01:00
starttime = time . time ( )
if not self . sanityCheck ( ) :
print " Cowardly refusing to continue after failed sanity check "
return
2009-03-12 16:31:29 +01:00
2009-02-26 01:59:36 +01:00
if self . follow :
2009-03-10 22:49:23 +01:00
numHands = 0
2009-03-10 17:17:54 +01:00
for handText in self . tailHands ( ) :
2009-03-10 22:49:23 +01:00
numHands + = 1
2009-03-10 17:17:54 +01:00
self . processHand ( handText )
2009-02-26 01:59:36 +01:00
else :
2009-03-10 17:17:54 +01:00
handsList = self . allHandsAsList ( )
2009-02-26 01:59:36 +01:00
logging . info ( " Parsing %d hands " % len ( handsList ) )
2009-03-10 17:17:54 +01:00
for handText in handsList :
2009-07-19 09:45:18 +02:00
self . processedHands . append ( self . processHand ( handText ) )
2009-03-10 22:49:23 +01:00
numHands = len ( handsList )
endtime = time . time ( )
2009-06-07 23:21:58 +02:00
print " read %d hands in %.3f seconds " % ( numHands , endtime - starttime )
2009-03-12 16:31:29 +01:00
if self . out_fh != sys . stdout :
self . out_fh . close ( )
2009-02-26 01:59:36 +01:00
def tailHands ( self ) :
2009-03-10 17:17:54 +01:00
""" Generator of handTexts from a tailed file:
2009-04-02 22:20:41 +02:00
Tail the in_path file and yield handTexts separated by re_SplitHands .
This requires a regex that greedily groups and matches the ' splitter ' between hands ,
which it expects to find at self . re_TailSplitHands - - see for e . g . Everleaf . py .
"""
2009-03-12 13:24:23 +01:00
if self . in_path == ' - ' : raise StopIteration
2009-03-10 17:17:54 +01:00
interval = 1.0 # seconds to sleep between reads for new data
2009-03-12 13:24:23 +01:00
fd = codecs . open ( self . in_path , ' r ' , self . codepage )
2009-03-10 17:17:54 +01:00
data = ' '
while 1 :
where = fd . tell ( )
newdata = fd . read ( self . READ_CHUNK_SIZE )
if not newdata :
fd_results = os . fstat ( fd . fileno ( ) )
try :
2009-03-12 13:24:23 +01:00
st_results = os . stat ( self . in_path )
2009-03-10 17:17:54 +01:00
except OSError :
st_results = fd_results
if st_results [ 1 ] == fd_results [ 1 ] :
time . sleep ( interval )
fd . seek ( where )
else :
2009-03-24 15:58:14 +01:00
logging . debug ( " %s changed inode numbers from %d to %d " % ( self . in_path , fd_results [ 1 ] , st_results [ 1 ] ) )
2009-03-12 13:24:23 +01:00
fd = codecs . open ( self . in_path , ' r ' , self . codepage )
2009-03-10 17:17:54 +01:00
fd . seek ( where )
2009-02-26 01:59:36 +01:00
else :
2009-03-10 17:17:54 +01:00
# yield hands
data = data + newdata
2009-04-02 22:20:41 +02:00
result = self . re_TailSplitHands . split ( data )
2009-03-10 17:17:54 +01:00
result = iter ( result )
2009-03-24 15:58:14 +01:00
data = ' '
2009-03-10 17:17:54 +01:00
# --x data (- is bit of splitter, x is paragraph) yield,...,keep
# [,--,x] result of re.split (with group around splitter)
# ,x our output: yield nothing, keep x
#
# --x--x [,--,x,--,x] x,x
# -x--x [-x,--,x] x,x
# x- [x-] ,x-
# x-- [x,--,] x,--
# x--x [x,--,x] x,x
# x--x-- [x,--,x,--,] x,x,--
# The length is always odd.
# 'odd' indices are always splitters.
# 'even' indices are always paragraphs or ''
# We want to discard all the ''
# We want to discard splitters unless the final item is '' (because the splitter could grow with new data)
# We want to yield all paragraphs followed by a splitter, i.e. all even indices except the last.
for para in result :
try :
2009-03-24 15:58:14 +01:00
result . next ( )
splitter = True
2009-03-10 17:17:54 +01:00
except StopIteration :
2009-03-24 15:58:14 +01:00
splitter = False
2009-03-10 17:17:54 +01:00
if splitter : # para is followed by a splitter
if para : yield para # para not ''
else :
data = para # keep final partial paragraph
def allHandsAsList ( self ) :
2009-02-26 01:59:36 +01:00
""" Return a list of handtexts in the file at self.in_path """
2009-03-10 22:49:23 +01:00
#TODO : any need for this to be generator? e.g. stars support can email one huge file of all hands in a year. Better to read bit by bit than all at once.
2009-02-26 01:59:36 +01:00
self . readFile ( )
self . obs = self . obs . strip ( )
self . obs = self . obs . replace ( ' \r \n ' , ' \n ' )
if self . obs == " " or self . obs == None :
logging . info ( " Read no hands. " )
return
2009-04-02 22:16:36 +02:00
return re . split ( self . re_SplitHands , self . obs )
2009-02-26 01:59:36 +01:00
2009-03-10 17:17:54 +01:00
def processHand ( self , handText ) :
gametype = self . determineGameType ( handText )
2009-03-06 19:10:04 +01:00
logging . debug ( " gametype %s " % gametype )
2009-07-16 06:58:39 +02:00
hand = None
2009-03-14 16:02:23 +01:00
if gametype is None :
l = None
gametype = " unmatched "
# TODO: not ideal, just trying to not error.
# TODO: Need to count failed hands.
else :
# See if gametype is supported.
type = gametype [ ' type ' ]
base = gametype [ ' base ' ]
limit = gametype [ ' limitType ' ]
l = [ type ] + [ base ] + [ limit ]
2009-03-12 11:43:28 +01:00
if l in self . readSupportedGames ( ) :
if gametype [ ' base ' ] == ' hold ' :
logging . debug ( " hand = Hand.HoldemOmahaHand(self, self.sitename, gametype, handtext) " )
hand = Hand . HoldemOmahaHand ( self , self . sitename , gametype , handText )
elif gametype [ ' base ' ] == ' stud ' :
hand = Hand . StudHand ( self , self . sitename , gametype , handText )
elif gametype [ ' base ' ] == ' draw ' :
hand = Hand . DrawHand ( self , self . sitename , gametype , handText )
else :
logging . info ( " Unsupported game type: %s " % gametype )
2009-03-10 00:03:17 +01:00
2009-03-05 02:12:15 +01:00
if hand :
2009-08-06 01:11:46 +02:00
# uncomment these to calculate some stats
2009-07-23 01:27:12 +02:00
# print hand
2009-08-06 01:11:46 +02:00
# hand.stats.getStats(hand)
2009-03-05 02:12:15 +01:00
hand . writeHand ( self . out_fh )
2009-07-19 09:45:18 +02:00
return hand
2009-03-05 02:12:15 +01:00
else :
2009-03-06 19:10:04 +01:00
logging . info ( " Unsupported game type: %s " % gametype )
2009-03-05 02:12:15 +01:00
# TODO: pity we don't know the HID at this stage. Log the entire hand?
# From the log we can deduce that it is the hand after the one before :)
2008-12-10 17:30:57 +01:00
2009-03-12 11:43:28 +01:00
2008-12-06 15:15:41 +01:00
# These functions are parse actions that may be overridden by the inheriting class
2009-02-05 10:50:50 +01:00
# This function should return a list of lists looking like:
# return [["ring", "hold", "nl"], ["tour", "hold", "nl"]]
# Showing all supported games limits and types
2008-12-06 15:15:41 +01:00
def readSupportedGames ( self ) : abstract
# should return a list
# type base limit
# [ ring, hold, nl , sb, bb ]
# Valid types specified in docs/tabledesign.html in Gametypes
2009-03-05 16:17:17 +01:00
def determineGameType ( self , handText ) : abstract
2009-03-10 00:03:17 +01:00
""" return dict with keys/values:
' type ' in ( ' ring ' , ' tour ' )
' limitType ' in ( ' nl ' , ' cn ' , ' pl ' , ' cp ' , ' fl ' )
' base ' in ( ' hold ' , ' stud ' , ' draw ' )
' category ' in ( ' holdem ' , ' omahahi ' , omahahilo ' , ' razz ' , ' studhi ' , ' studhilo ' , ' fivedraw ' , ' 27_1 draw ' , ' 27_3 draw ' , ' badugi ' )
' hilo ' in ( ' h ' , ' l ' , ' s ' )
' smallBlind ' int ?
' bigBlind ' int ?
' smallBet '
' bigBet '
' currency ' in ( ' USD ' , ' EUR ' , ' T$ ' , < countrycode > )
or None if we fail to get the info """
#TODO: which parts are optional/required?
2008-12-06 15:15:41 +01:00
# Read any of:
2009-02-27 19:42:53 +01:00
# HID HandID
# TABLE Table name
# SB small blind
# BB big blind
# GAMETYPE gametype
# YEAR MON DAY HR MIN SEC datetime
# BUTTON button seat number
2008-12-06 15:15:41 +01:00
def readHandInfo ( self , hand ) : abstract
# Needs to return a list of lists in the format
# [['seat#', 'player1name', 'stacksize'] ['seat#', 'player2name', 'stacksize'] [...]]
def readPlayerStacks ( self , hand ) : abstract
2009-02-20 17:29:52 +01:00
2009-02-25 15:35:28 +01:00
def compilePlayerRegexs ( self ) : abstract
""" Compile dynamic regexes -- these explicitly match known player names and must be updated if a new player joins """
2009-02-20 17:29:52 +01:00
2008-12-06 15:15:41 +01:00
# Needs to return a MatchObject with group names identifying the streets into the Hand object
2008-12-14 23:05:51 +01:00
# so groups are called by street names 'PREFLOP', 'FLOP', 'STREET2' etc
# blinds are done seperately
2008-12-06 15:15:41 +01:00
def markStreets ( self , hand ) : abstract
#Needs to return a list in the format
# ['player1name', 'player2name', ...] where player1name is the sb and player2name is bb,
# addtional players are assumed to post a bb oop
def readBlinds ( self , hand ) : abstract
2009-02-24 17:17:25 +01:00
def readAntes ( self , hand ) : abstract
def readBringIn ( self , hand ) : abstract
def readButton ( self , hand ) : abstract
2008-12-06 15:15:41 +01:00
def readHeroCards ( self , hand ) : abstract
2009-02-25 11:32:12 +01:00
def readPlayerCards ( self , hand , street ) : abstract
2008-12-06 15:15:41 +01:00
def readAction ( self , hand , street ) : abstract
2008-12-09 16:32:37 +01:00
def readCollectPot ( self , hand ) : abstract
2008-12-16 18:14:37 +01:00
def readShownCards ( self , hand ) : abstract
2009-07-22 16:24:29 +02:00
# Some sites do odd stuff that doesn't fall in to the normal HH parsing.
# e.g., FTP doesn't put mixed game info in the HH, but puts in in the
# file name. Use readOther() to clean up those messes.
def readOther ( self , hand ) : pass
2008-12-06 15:15:41 +01:00
# Some sites don't report the rake. This will be called at the end of the hand after the pot total has been calculated
2008-12-14 23:05:51 +01:00
# an inheriting class can calculate it for the specific site if need be.
def getRake ( self , hand ) :
hand . rake = hand . totalpot - hand . totalcollected # * Decimal('0.05') # probably not quite right
2008-12-06 15:15:41 +01:00
def sanityCheck ( self ) :
2009-03-10 22:49:23 +01:00
""" Check we aren ' t going to do some stupid things """
#TODO: the hhbase stuff needs to be in fpdb_import
2008-12-14 20:25:04 +01:00
sane = False
2008-12-06 15:15:41 +01:00
base_w = False
2009-03-10 22:49:23 +01:00
#~ #Check if hhbase exists and is writable
#~ #Note: Will not try to create the base HH directory
#~ if not (os.access(self.hhbase, os.W_OK) and os.path.isdir(self.hhbase)):
#~ print "HH Sanity Check: Directory hhbase '" + self.hhbase + "' doesn't exist or is not writable"
#~ else:
#~ #Check if hhdir exists and is writable
#~ if not os.path.isdir(self.hhdir):
#~ # In first pass, dir may not exist. Attempt to create dir
#~ print "Creating directory: '%s'" % (self.hhdir)
#~ os.mkdir(self.hhdir)
#~ sane = True
#~ elif os.access(self.hhdir, os.W_OK):
#~ sane = True
#~ else:
#~ print "HH Sanity Check: Directory hhdir '" + self.hhdir + "' or its parent directory are not writable"
2008-12-06 15:15:41 +01:00
2009-02-21 14:06:10 +01:00
# Make sure input and output files are different or we'll overwrite the source file
2009-03-10 22:49:23 +01:00
if True : # basically.. I don't know
sane = True
if ( self . in_path != ' - ' and self . out_path == self . in_path ) :
2009-02-21 14:06:10 +01:00
print " HH Sanity Check: output and input files are the same, check config "
2009-03-10 22:49:23 +01:00
sane = False
2009-02-21 14:06:10 +01:00
2008-12-06 15:15:41 +01:00
return sane
# Functions not necessary to implement in sub class
2008-12-10 17:30:57 +01:00
def setFileType ( self , filetype = " text " , codepage = ' utf8 ' ) :
2008-12-06 15:15:41 +01:00
self . filetype = filetype
2008-12-10 17:30:57 +01:00
self . codepage = codepage
2008-12-06 15:15:41 +01:00
2009-07-19 09:45:18 +02:00
#This function doesn't appear to be used
2008-12-06 15:15:41 +01:00
def splitFileIntoHands ( self ) :
hands = [ ]
2009-02-26 01:59:36 +01:00
self . obs = self . obs . strip ( )
2009-02-20 17:29:52 +01:00
list = self . re_SplitHands . split ( self . obs )
2008-12-06 15:15:41 +01:00
list . pop ( ) #Last entry is empty
for l in list :
2009-03-04 16:10:08 +01:00
# print "'" + l + "'"
2008-12-14 20:25:04 +01:00
hands = hands + [ Hand . Hand ( self . sitename , self . gametype , l ) ]
2008-12-06 15:15:41 +01:00
return hands
2009-02-26 01:59:36 +01:00
def readFile ( self ) :
2009-03-10 00:03:17 +01:00
""" open in_path according to self.codepage """
2009-02-26 01:59:36 +01:00
2008-12-06 15:15:41 +01:00
if ( self . filetype == " text " ) :
2009-02-26 01:59:36 +01:00
if self . in_path == ' - ' :
# read from stdin
2009-02-26 16:36:23 +01:00
logging . debug ( " Reading stdin with %s " % self . codepage ) # is this necessary? or possible? or what?
2009-02-26 01:59:36 +01:00
in_fh = codecs . getreader ( ' cp1252 ' ) ( sys . stdin )
else :
logging . debug ( " Opening %s with %s " % ( self . in_path , self . codepage ) )
in_fh = codecs . open ( self . in_path , ' r ' , self . codepage )
2009-07-17 11:45:22 +02:00
in_fh . seek ( self . index )
2009-02-26 01:59:36 +01:00
self . obs = in_fh . read ( )
2009-07-17 11:45:22 +02:00
self . index = in_fh . tell ( )
2009-02-26 01:59:36 +01:00
in_fh . close ( )
2008-12-06 15:15:41 +01:00
elif ( self . filetype == " xml " ) :
try :
doc = xml . dom . minidom . parse ( filename )
self . doc = doc
except :
traceback . print_exc ( file = sys . stderr )
2009-07-20 16:01:51 +02:00
def guessMaxSeats ( self , hand ) :
""" Return a guess at max_seats when not specified in HH. """
mo = self . maxOccSeat ( hand )
if mo == 10 : return 10 #that was easy
if hand . gametype [ ' base ' ] == ' stud ' :
if mo < = 8 : return 8
else : return mo
if hand . gametype [ ' base ' ] == ' draw ' :
if mo < = 6 : return 6
else : return mo
if mo == 2 : return 2
if mo < = 6 : return 6
return 10
def maxOccSeat ( self , hand ) :
max = 0
for player in hand . players :
if player [ 0 ] > max : max = player [ 0 ]
return max
2008-11-07 11:19:18 +01:00
2009-02-05 10:28:18 +01:00
def getStatus ( self ) :
#TODO: Return a status of true if file processed ok
return True
2009-07-19 09:45:18 +02:00
def getProcessedHands ( self ) :
return self . processedHands
2009-02-05 10:28:18 +01:00
def getProcessedFile ( self ) :
2009-03-10 22:49:23 +01:00
return self . out_path
2009-07-17 11:45:22 +02:00
def getLastCharacterRead ( self ) :
return self . index