2008-08-04 05:44:28 +02:00
#!/usr/bin/python
#Copyright 2008 Steffen Jobbagy-Felso
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU Affero General Public License as published by
#the Free Software Foundation, version 3 of the License.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU Affero General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#In the "official" distribution you can find the license in
#agpl-3.0.txt in the docs folder of the package.
#see status.txt for site/games support info
2009-02-07 16:06:48 +01:00
# Standard Library modules
import os # todo: remove this once import_dir is in fpdb_import
2008-08-04 05:44:28 +02:00
import sys
2009-02-07 16:06:48 +01:00
from time import time
import traceback
import math
import datetime
import re
2008-09-16 23:19:50 +02:00
2009-02-07 16:06:48 +01:00
# fpdb/FreePokerTools modules
2008-09-16 23:19:50 +02:00
2009-02-07 16:06:48 +01:00
import fpdb_simple
import fpdb_db
import fpdb_parse_logic
import Configuration
2009-02-11 08:40:33 +01:00
import EverleafToFpdb
2009-02-22 04:42:16 +01:00
import FulltiltToFpdb
2009-02-07 16:06:48 +01:00
# database interface modules
2008-09-16 23:19:50 +02:00
try :
2008-12-06 20:50:40 +01:00
import MySQLdb
mysqlLibFound = True
2008-09-16 23:19:50 +02:00
except :
2008-12-06 20:50:40 +01:00
pass
2008-09-16 23:19:50 +02:00
try :
2008-12-06 20:50:40 +01:00
import psycopg2
pgsqlLibFound = True
2008-09-16 23:19:50 +02:00
except :
2008-12-06 20:50:40 +01:00
pass
2008-09-16 23:19:50 +02:00
2008-10-08 19:36:08 +02:00
class Importer :
2008-08-04 05:44:28 +02:00
2008-12-06 20:50:40 +01:00
def __init__ ( self , caller , settings , config ) :
""" Constructor """
self . settings = settings
self . caller = caller
self . config = config
2008-12-11 00:22:47 +01:00
self . fdb = None
2008-12-06 20:50:40 +01:00
self . cursor = None
self . filelist = { }
self . dirlist = { }
2009-02-19 11:38:51 +01:00
self . addToDirList = { }
2009-02-26 04:44:03 +01:00
self . removeFromFileList = { } # to remove deleted files
2008-12-06 20:50:40 +01:00
self . monitor = False
self . updated = { } #Time last import was run {file:mtime}
self . lines = None
self . faobs = None #File as one big string
self . pos_in_file = { } # dict to remember how far we have read in the file
#Set defaults
self . callHud = self . config . get_import_parameters ( ) . get ( " callFpdbHud " )
2009-01-28 09:46:43 +01:00
if ' minPrint ' not in self . settings :
2008-12-12 12:25:26 +01:00
#TODO: Is this value in the xml file?
2008-12-06 20:50:40 +01:00
self . settings [ ' minPrint ' ] = 30
2009-01-28 09:46:43 +01:00
if ' handCount ' not in self . settings :
2008-12-12 12:25:26 +01:00
#TODO: Is this value in the xml file?
self . settings [ ' handCount ' ] = 0
2008-12-11 00:22:47 +01:00
self . fdb = fpdb_db . fpdb_db ( ) # sets self.fdb.db self.fdb.cursor and self.fdb.sql
2009-01-03 18:59:48 +01:00
self . fdb . do_connect ( self . config )
2008-12-06 20:50:40 +01:00
#Set functions
def setCallHud ( self , value ) :
self . callHud = value
def setMinPrint ( self , value ) :
self . settings [ ' minPrint ' ] = int ( value )
def setHandCount ( self , value ) :
self . settings [ ' handCount ' ] = int ( value )
def setQuiet ( self , value ) :
self . settings [ ' quiet ' ] = value
def setFailOnError ( self , value ) :
self . settings [ ' failOnError ' ] = value
2009-02-07 16:06:48 +01:00
def setHandsInDB ( self , value ) :
self . settings [ ' handsInDB ' ] = value
def setThreads ( self , value ) :
self . settings [ ' threads ' ] = value
def setDropIndexes ( self , value ) :
self . settings [ ' dropIndexes ' ] = value
2008-12-06 20:50:40 +01:00
# def setWatchTime(self):
# self.updated = time()
def clearFileList ( self ) :
self . filelist = { }
#Add an individual file to filelist
def addImportFile ( self , filename , site = " default " , filter = " passthrough " ) :
2009-02-07 16:06:48 +01:00
#TODO: test it is a valid file -> put that in config!!
2008-12-06 20:50:40 +01:00
self . filelist [ filename ] = [ site ] + [ filter ]
2009-02-25 15:59:11 +01:00
# Called from GuiBulkImport to add a file or directory.
def addBulkImportImportFileOrDir ( self , inputPath , filter = " passthrough " ) :
""" Add a file or directory for bulk import """
# Bulk import never monitors
# if directory, add all files in it. Otherwise add single file.
# TODO: only add sane files?
if os . path . isdir ( inputPath ) :
for subdir in os . walk ( inputPath ) :
for file in subdir [ 2 ] :
2009-02-25 16:16:18 +01:00
self . addImportFile ( os . path . join ( inputPath , subdir [ 0 ] , file ) , site = " default " , filter = filter )
2009-02-25 15:59:11 +01:00
else :
self . addImportFile ( inputPath , site = " default " , filter = filter )
2008-12-06 20:50:40 +01:00
#Add a directory of files to filelist
#Only one import directory per site supported.
#dirlist is a hash of lists:
#dirlist{ 'PokerStars' => ["/path/to/import/", "filtername"] }
def addImportDirectory ( self , dir , monitor = False , site = " default " , filter = " passthrough " ) :
2009-02-24 14:46:05 +01:00
#This should really be using os.walk
#http://docs.python.org/library/os.html
2008-12-06 20:50:40 +01:00
if os . path . isdir ( dir ) :
if monitor == True :
self . monitor = True
self . dirlist [ site ] = [ dir ] + [ filter ]
for file in os . listdir ( dir ) :
self . addImportFile ( os . path . join ( dir , file ) , site , filter )
else :
2009-01-28 09:46:43 +01:00
print " Warning: Attempted to add non-directory: ' " + str ( dir ) + " ' as an import directory "
2008-12-06 20:50:40 +01:00
def runImport ( self ) :
2009-02-07 16:06:48 +01:00
""" " Run full import on self.filelist. """
start = datetime . datetime . now ( )
print " started at " , start , " -- " , len ( self . filelist ) , " files to import. " , self . settings [ ' dropIndexes ' ]
if self . settings [ ' dropIndexes ' ] == ' auto ' :
self . settings [ ' dropIndexes ' ] = self . calculate_auto ( )
if self . settings [ ' dropIndexes ' ] == ' drop ' :
fpdb_simple . prepareBulkImport ( self . fdb )
2009-01-16 18:24:00 +01:00
totstored = 0
totdups = 0
totpartial = 0
toterrors = 0
tottime = 0
2009-02-07 16:06:48 +01:00
# if threads <= 1: do this bit
2008-12-06 20:50:40 +01:00
for file in self . filelist :
2009-01-16 18:24:00 +01:00
( stored , duplicates , partial , errors , ttime ) = self . import_file_dict ( file , self . filelist [ file ] [ 0 ] , self . filelist [ file ] [ 1 ] )
totstored + = stored
totdups + = duplicates
totpartial + = partial
toterrors + = errors
tottime + = ttime
2009-02-07 16:06:48 +01:00
if self . settings [ ' dropIndexes ' ] == ' drop ' :
fpdb_simple . afterBulkImport ( self . fdb )
2008-12-14 03:30:19 +01:00
fpdb_simple . analyzeDB ( self . fdb )
2009-01-16 18:24:00 +01:00
return ( totstored , totdups , totpartial , toterrors , tottime )
2009-02-07 16:06:48 +01:00
# else: import threaded
def calculate_auto ( self ) :
""" An heuristic to determine a reasonable value of drop/don ' t drop """
if len ( self . filelist ) == 1 : return " don ' t drop "
if self . settings [ ' handsInDB ' ] < 5000 : return " drop "
if len ( self . filelist ) < 50 : return " don ' t drop "
if self . settings [ ' handsInDB ' ] > 50000 : return " don ' t drop "
return " drop "
2008-12-06 20:50:40 +01:00
#Run import on updated files, then store latest update time.
def runUpdated ( self ) :
#Check for new files in directory
#todo: make efficient - always checks for new file, should be able to use mtime of directory
# ^^ May not work on windows
for site in self . dirlist :
self . addImportDirectory ( self . dirlist [ site ] [ 0 ] , False , site , self . dirlist [ site ] [ 1 ] )
for file in self . filelist :
stat_info = os . stat ( file )
try :
lastupdate = self . updated [ file ]
if stat_info . st_mtime > lastupdate :
self . import_file_dict ( file , self . filelist [ file ] [ 0 ] , self . filelist [ file ] [ 1 ] )
self . updated [ file ] = time ( )
except :
self . updated [ file ] = time ( )
# This codepath only runs first time the file is found, if modified in the last
# minute run an immediate import.
2009-02-19 11:38:51 +01:00
if ( time ( ) - stat_info . st_mtime ) < 60 or os . path . isdir ( file ) : # TODO: figure out a way to dispatch this to the seperate thread so our main window doesn't lock up on initial import
2008-12-06 20:50:40 +01:00
self . import_file_dict ( file , self . filelist [ file ] [ 0 ] , self . filelist [ file ] [ 1 ] )
2009-02-19 11:38:51 +01:00
for dir in self . addToDirList :
self . addImportDirectory ( dir , True , self . addToDirList [ dir ] [ 0 ] , self . addToDirList [ dir ] [ 1 ] )
2009-02-26 04:44:03 +01:00
for file in self . removeFromFileList :
2009-02-26 05:17:36 +01:00
if file in self . filelist :
del self . filelist [ file ]
2009-02-19 11:38:51 +01:00
self . addToDirList = { }
2009-02-26 04:44:03 +01:00
self . removeFromFileList = { }
2008-12-06 20:50:40 +01:00
# This is now an internal function that should not be called directly.
def import_file_dict ( self , file , site , filter ) :
2009-02-19 11:38:51 +01:00
if os . path . isdir ( file ) :
self . addToDirList [ file ] = [ site ] + [ filter ]
return
if filter == " passthrough " or filter == " " :
2009-01-16 18:24:00 +01:00
( stored , duplicates , partial , errors , ttime ) = self . import_fpdb_file ( file , site )
2008-12-06 20:50:40 +01:00
else :
2009-02-05 10:28:18 +01:00
conv = None
# Load filter, process file, pass returned filename to import_fpdb_file
2009-02-19 11:38:51 +01:00
# TODO: Shouldn't we be able to use some sort of lambda or something to just call a Python object by whatever name we specify? then we don't have to hardcode them,
# someone can just create their own python module for it
if filter == " EverleafToFpdb " :
print " converting " , file
conv = EverleafToFpdb . Everleaf ( self . config , file )
2009-02-20 08:06:22 +01:00
elif filter == " FulltiltToFpdb " :
print " converting " , file
2009-02-22 04:42:16 +01:00
conv = FulltiltToFpdb . FullTilt ( self . config , file )
2009-02-19 11:38:51 +01:00
else :
print " Unknown filter " , filter
return
2009-02-05 10:28:18 +01:00
2009-02-05 10:50:50 +01:00
supp = conv . readSupportedGames ( ) # Should this be done by HHC on init?
2009-02-19 14:10:31 +01:00
#gt = conv.determineGameType()
2009-02-05 10:50:50 +01:00
# TODO: Check that gt is in supp - error appropriately if not
2009-02-05 10:28:18 +01:00
conv . processFile ( )
if ( conv . getStatus ( ) ) :
2009-02-19 14:10:31 +01:00
( stored , duplicates , partial , errors , ttime ) = self . import_fpdb_file ( conv . getProcessedFile ( ) , site )
2009-02-20 17:29:52 +01:00
else :
# conversion didn't work
# TODO: appropriate response?
return ( 0 , 0 , 0 , 1 , 0 )
2009-02-05 10:28:18 +01:00
#This will barf if conv.getStatus != True
2009-01-16 18:24:00 +01:00
return ( stored , duplicates , partial , errors , ttime )
2008-12-06 20:50:40 +01:00
def import_fpdb_file ( self , file , site ) :
starttime = time ( )
last_read_hand = 0
loc = 0
if ( file == " stdin " ) :
inputFile = sys . stdin
else :
2009-02-26 04:44:03 +01:00
if os . path . exists ( file ) :
inputFile = open ( file , " rU " )
else :
2009-02-26 05:17:36 +01:00
self . removeFromFileList [ file ] = True
return ( 0 , 0 , 0 , 1 , 0 )
2009-02-25 18:40:39 +01:00
try :
loc = self . pos_in_file [ file ]
2009-02-26 04:44:03 +01:00
except :
pass
2008-12-06 20:50:40 +01:00
# Read input file into class and close file
inputFile . seek ( loc )
self . lines = fpdb_simple . removeTrailingEOL ( inputFile . readlines ( ) )
self . pos_in_file [ file ] = inputFile . tell ( )
inputFile . close ( )
try : # sometimes we seem to be getting an empty self.lines, in which case, we just want to return.
firstline = self . lines [ 0 ]
except :
# print "import_fpdb_file", file, site, self.lines, "\n"
2009-01-19 03:32:34 +01:00
return ( 0 , 0 , 0 , 1 , 0 )
2008-12-06 20:50:40 +01:00
if firstline . find ( " Tournament Summary " ) != - 1 :
print " TODO: implement importing tournament summaries "
#self.faobs = readfile(inputFile)
#self.parseTourneyHistory()
return 0
site = fpdb_simple . recogniseSite ( firstline )
category = fpdb_simple . recogniseCategory ( firstline )
startpos = 0
stored = 0 #counter
duplicates = 0 #counter
partial = 0 #counter
errors = 0 #counter
for i in range ( len ( self . lines ) ) : #main loop, iterates through the lines of a file and calls the appropriate parser method
if ( len ( self . lines [ i ] ) < 2 ) :
endpos = i
hand = self . lines [ startpos : endpos ]
if ( len ( hand [ 0 ] ) < 2 ) :
hand = hand [ 1 : ]
cancelled = False
damaged = False
if ( site == " ftp " ) :
for i in range ( len ( hand ) ) :
if ( hand [ i ] . endswith ( " has been canceled " ) ) : #this is their typo. this is a typo, right?
cancelled = True
2009-01-16 16:24:01 +01:00
#FTP generates lines looking like:
#Seat 1: IOS Seat 2: kashman59 (big blind) showed [8c 9d] and won ($3.25) with a pair of Eights
#ie. Seat X multiple times on the same line in the summary section, when a new player sits down in the
#middle of the hand.
#TODO: Deal with this properly, either fix the file or make the parsing code work with this line.
2009-01-28 09:46:43 +01:00
if " Seat " in hand [ i ] :
mo = re . search ( " Seat [0-9]+: " , hand [ i ] )
if mo :
print " mo= " , mo , " \n mo.start= " , mo . start ( ) , " \n hand[i]= " , hand [ i ]
hand . insert ( i + 1 , hand [ i ] [ mo . start ( ) + 1 : ] )
hand [ i ] = hand [ i ] [ 0 : mo . start ( ) ]
2008-12-06 20:50:40 +01:00
if ( len ( hand ) < 3 ) :
pass
#todo: the above 2 lines are kind of a dirty hack, the mentioned circumstances should be handled elsewhere but that doesnt work with DOS/Win EOL. actually this doesnt work.
elif ( hand [ 0 ] . endswith ( " (partial) " ) ) : #partial hand - do nothing
partial + = 1
elif ( hand [ 1 ] . find ( " Seat " ) == - 1 and hand [ 2 ] . find ( " Seat " ) == - 1 and hand [ 3 ] . find ( " Seat " ) == - 1 ) : #todo: should this be or instead of and?
partial + = 1
elif ( cancelled or damaged ) :
partial + = 1
2009-01-16 16:24:01 +01:00
if damaged :
print """
DEBUG : Partial hand triggered by a line containing ' Seat X: ' twice . This is a
bug in the FTP software when a player sits down in the middle of a hand .
Adding a newline after the player name will fix the issue
"""
print " File: %s " % ( file )
print " Line: %s " % ( startpos )
2008-12-06 20:50:40 +01:00
else : #normal processing
isTourney = fpdb_simple . isTourney ( hand [ 0 ] )
if not isTourney :
fpdb_simple . filterAnteBlindFold ( site , hand )
hand = fpdb_simple . filterCrap ( site , hand , isTourney )
self . hand = hand
try :
2008-12-11 00:22:47 +01:00
handsId = fpdb_parse_logic . mainParser ( self . settings [ ' db-backend ' ] , self . fdb . db
, self . fdb . cursor , site , category , hand )
self . fdb . db . commit ( )
2008-12-06 20:50:40 +01:00
stored + = 1
if self . callHud :
#print "call to HUD here. handsId:",handsId
#pipe the Hands.id out to the HUD
self . caller . pipe_to_hud . stdin . write ( " %s " % ( handsId ) + os . linesep )
except fpdb_simple . DuplicateError :
duplicates + = 1
except ( ValueError ) , fe :
errors + = 1
self . printEmailErrorMessage ( errors , file , hand )
if ( self . settings [ ' failOnError ' ] ) :
2008-12-11 00:22:47 +01:00
self . fdb . db . commit ( ) #dont remove this, in case hand processing was cancelled.
2008-12-06 20:50:40 +01:00
raise
except ( fpdb_simple . FpdbError ) , fe :
errors + = 1
self . printEmailErrorMessage ( errors , file , hand )
#fe.printStackTrace() #todo: get stacktrace
2008-12-11 00:22:47 +01:00
self . fdb . db . rollback ( )
2008-12-06 20:50:40 +01:00
if ( self . settings [ ' failOnError ' ] ) :
2008-12-11 00:22:47 +01:00
self . fdb . db . commit ( ) #dont remove this, in case hand processing was cancelled.
2008-12-06 20:50:40 +01:00
raise
if ( self . settings [ ' minPrint ' ] != 0 ) :
if ( ( stored + duplicates + partial + errors ) % self . settings [ ' minPrint ' ] == 0 ) :
print " stored: " , stored , " duplicates: " , duplicates , " partial: " , partial , " errors: " , errors
if ( self . settings [ ' handCount ' ] != 0 ) :
if ( ( stored + duplicates + partial + errors ) > = self . settings [ ' handCount ' ] ) :
if ( not self . settings [ ' quiet ' ] ) :
print " quitting due to reaching the amount of hands to be imported "
print " Total stored: " , stored , " duplicates: " , duplicates , " partial/damaged: " , partial , " errors: " , errors , " time: " , ( time ( ) - starttime )
sys . exit ( 0 )
startpos = endpos
2009-01-16 18:24:00 +01:00
ttime = time ( ) - starttime
2009-02-24 16:54:02 +01:00
print " \r Total stored: " , stored , " duplicates: " , duplicates , " partial: " , partial , " errors: " , errors , " time: " , ttime
2008-12-06 20:50:40 +01:00
if stored == 0 :
if duplicates > 0 :
for line_no in range ( len ( self . lines ) ) :
if self . lines [ line_no ] . find ( " Game # " ) != - 1 :
final_game_line = self . lines [ line_no ]
handsId = fpdb_simple . parseSiteHandNo ( final_game_line )
else :
print " failed to read a single hand from file: " , inputFile
handsId = 0
#todo: this will cause return of an unstored hand number if the last hand was error or partial
2008-12-11 00:22:47 +01:00
self . fdb . db . commit ( )
2008-12-06 20:50:40 +01:00
self . handsId = handsId
2009-01-16 18:24:00 +01:00
return ( stored , duplicates , partial , errors , ttime )
2008-08-04 05:44:28 +02:00
2008-12-18 23:39:43 +01:00
def parseTourneyHistory ( self ) :
print " Tourney history parser stub "
#Find tournament boundaries.
#print self.foabs
def printEmailErrorMessage ( self , errors , filename , line ) :
traceback . print_exc ( file = sys . stderr )
print " Error No. " , errors , " , please send the hand causing this to steffen@sycamoretest.info so I can fix it. "
print " Filename: " , filename
print " Here is the first line so you can identify it. Please mention that the error was a ValueError: "
print self . hand [ 0 ]
print " Hand logged to hand-errors.txt "
logfile = open ( ' hand-errors.txt ' , ' a ' )
for s in self . hand :
logfile . write ( str ( s ) + " \n " )
logfile . write ( " \n " )
logfile . close ( )
2008-08-04 05:44:28 +02:00
if __name__ == " __main__ " :
2008-12-06 20:50:40 +01:00
print " CLI for fpdb_import is now available as CliFpdb.py "