2008-08-04 05:44:28 +02:00
#!/usr/bin/python
#Copyright 2008 Steffen Jobbagy-Felso
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU Affero General Public License as published by
#the Free Software Foundation, version 3 of the License.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU Affero General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#In the "official" distribution you can find the license in
#agpl-3.0.txt in the docs folder of the package.
#see status.txt for site/games support info
2009-02-07 16:06:48 +01:00
# Standard Library modules
import os # todo: remove this once import_dir is in fpdb_import
2008-08-04 05:44:28 +02:00
import sys
2009-02-07 16:06:48 +01:00
from time import time
import traceback
import math
import datetime
import re
2008-09-16 23:19:50 +02:00
2009-02-07 16:06:48 +01:00
# fpdb/FreePokerTools modules
2008-09-16 23:19:50 +02:00
2009-02-07 16:06:48 +01:00
import fpdb_simple
import fpdb_db
import fpdb_parse_logic
import Configuration
# database interface modules
2008-09-16 23:19:50 +02:00
try :
2008-12-06 20:50:40 +01:00
import MySQLdb
mysqlLibFound = True
2008-09-16 23:19:50 +02:00
except :
2008-12-06 20:50:40 +01:00
pass
2008-09-16 23:19:50 +02:00
try :
2008-12-06 20:50:40 +01:00
import psycopg2
pgsqlLibFound = True
2008-09-16 23:19:50 +02:00
except :
2008-12-06 20:50:40 +01:00
pass
2008-09-16 23:19:50 +02:00
2008-10-08 19:36:08 +02:00
class Importer :
2008-08-04 05:44:28 +02:00
2008-12-06 20:50:40 +01:00
def __init__ ( self , caller , settings , config ) :
""" Constructor """
2009-03-19 02:22:04 +01:00
self . settings = settings
self . caller = caller
self . config = config
self . fdb = None
self . cursor = None
self . filelist = { }
self . dirlist = { }
2009-02-19 11:38:51 +01:00
self . addToDirList = { }
2009-02-26 04:44:03 +01:00
self . removeFromFileList = { } # to remove deleted files
2009-03-19 02:22:04 +01:00
self . monitor = False
self . updated = { } #Time last import was run {file:mtime}
self . lines = None
self . faobs = None #File as one big string
2008-12-06 20:50:40 +01:00
self . pos_in_file = { } # dict to remember how far we have read in the file
#Set defaults
2009-03-19 02:22:04 +01:00
self . callHud = self . config . get_import_parameters ( ) . get ( " callFpdbHud " )
2009-03-26 23:55:16 +01:00
self . settings . setdefault ( " minPrint " , 30 )
self . settings . setdefault ( " handCount " , 0 )
2008-12-11 00:22:47 +01:00
self . fdb = fpdb_db . fpdb_db ( ) # sets self.fdb.db self.fdb.cursor and self.fdb.sql
2009-01-03 18:59:48 +01:00
self . fdb . do_connect ( self . config )
2008-12-06 20:50:40 +01:00
#Set functions
def setCallHud ( self , value ) :
self . callHud = value
def setMinPrint ( self , value ) :
self . settings [ ' minPrint ' ] = int ( value )
def setHandCount ( self , value ) :
self . settings [ ' handCount ' ] = int ( value )
def setQuiet ( self , value ) :
self . settings [ ' quiet ' ] = value
def setFailOnError ( self , value ) :
self . settings [ ' failOnError ' ] = value
2009-02-07 16:06:48 +01:00
def setHandsInDB ( self , value ) :
self . settings [ ' handsInDB ' ] = value
def setThreads ( self , value ) :
self . settings [ ' threads ' ] = value
def setDropIndexes ( self , value ) :
self . settings [ ' dropIndexes ' ] = value
2008-12-06 20:50:40 +01:00
# def setWatchTime(self):
# self.updated = time()
def clearFileList ( self ) :
self . filelist = { }
#Add an individual file to filelist
def addImportFile ( self , filename , site = " default " , filter = " passthrough " ) :
2009-02-07 16:06:48 +01:00
#TODO: test it is a valid file -> put that in config!!
2008-12-06 20:50:40 +01:00
self . filelist [ filename ] = [ site ] + [ filter ]
2009-02-25 15:59:11 +01:00
# Called from GuiBulkImport to add a file or directory.
def addBulkImportImportFileOrDir ( self , inputPath , filter = " passthrough " ) :
""" Add a file or directory for bulk import """
2009-03-21 16:34:23 +01:00
2009-02-25 15:59:11 +01:00
# Bulk import never monitors
# if directory, add all files in it. Otherwise add single file.
# TODO: only add sane files?
if os . path . isdir ( inputPath ) :
for subdir in os . walk ( inputPath ) :
for file in subdir [ 2 ] :
2009-02-25 16:16:18 +01:00
self . addImportFile ( os . path . join ( inputPath , subdir [ 0 ] , file ) , site = " default " , filter = filter )
2009-02-25 15:59:11 +01:00
else :
self . addImportFile ( inputPath , site = " default " , filter = filter )
2008-12-06 20:50:40 +01:00
#Add a directory of files to filelist
#Only one import directory per site supported.
#dirlist is a hash of lists:
#dirlist{ 'PokerStars' => ["/path/to/import/", "filtername"] }
def addImportDirectory ( self , dir , monitor = False , site = " default " , filter = " passthrough " ) :
2009-03-05 02:12:15 +01:00
#gets called by GuiAutoImport.
2009-02-24 14:46:05 +01:00
#This should really be using os.walk
#http://docs.python.org/library/os.html
2008-12-06 20:50:40 +01:00
if os . path . isdir ( dir ) :
if monitor == True :
self . monitor = True
self . dirlist [ site ] = [ dir ] + [ filter ]
for file in os . listdir ( dir ) :
self . addImportFile ( os . path . join ( dir , file ) , site , filter )
else :
2009-01-28 09:46:43 +01:00
print " Warning: Attempted to add non-directory: ' " + str ( dir ) + " ' as an import directory "
2008-12-06 20:50:40 +01:00
def runImport ( self ) :
2009-02-07 16:06:48 +01:00
""" " Run full import on self.filelist. """
start = datetime . datetime . now ( )
print " started at " , start , " -- " , len ( self . filelist ) , " files to import. " , self . settings [ ' dropIndexes ' ]
if self . settings [ ' dropIndexes ' ] == ' auto ' :
self . settings [ ' dropIndexes ' ] = self . calculate_auto ( )
if self . settings [ ' dropIndexes ' ] == ' drop ' :
fpdb_simple . prepareBulkImport ( self . fdb )
2009-01-16 18:24:00 +01:00
totstored = 0
totdups = 0
totpartial = 0
toterrors = 0
tottime = 0
2009-02-07 16:06:48 +01:00
# if threads <= 1: do this bit
2008-12-06 20:50:40 +01:00
for file in self . filelist :
2009-01-16 18:24:00 +01:00
( stored , duplicates , partial , errors , ttime ) = self . import_file_dict ( file , self . filelist [ file ] [ 0 ] , self . filelist [ file ] [ 1 ] )
totstored + = stored
totdups + = duplicates
totpartial + = partial
toterrors + = errors
tottime + = ttime
2009-02-07 16:06:48 +01:00
if self . settings [ ' dropIndexes ' ] == ' drop ' :
fpdb_simple . afterBulkImport ( self . fdb )
2008-12-14 03:30:19 +01:00
fpdb_simple . analyzeDB ( self . fdb )
2009-01-16 18:24:00 +01:00
return ( totstored , totdups , totpartial , toterrors , tottime )
2009-02-07 16:06:48 +01:00
# else: import threaded
def calculate_auto ( self ) :
""" An heuristic to determine a reasonable value of drop/don ' t drop """
2009-03-21 15:27:49 +01:00
if len ( self . filelist ) == 1 : return " don ' t drop "
if ' handsInDB ' not in self . settings :
try :
tmpcursor = self . fdb . db . cursor ( )
tmpcursor . execute ( " Select count(1) from Hands; " )
self . settings [ ' handsInDB ' ] = tmpcursor . fetchone ( ) [ 0 ]
except :
pass # if this fails we're probably doomed anyway
2009-02-07 16:06:48 +01:00
if self . settings [ ' handsInDB ' ] < 5000 : return " drop "
if len ( self . filelist ) < 50 : return " don ' t drop "
if self . settings [ ' handsInDB ' ] > 50000 : return " don ' t drop "
return " drop "
2008-12-06 20:50:40 +01:00
#Run import on updated files, then store latest update time.
def runUpdated ( self ) :
2009-02-26 16:36:23 +01:00
#Check for new files in monitored directories
2008-12-06 20:50:40 +01:00
#todo: make efficient - always checks for new file, should be able to use mtime of directory
# ^^ May not work on windows
2009-02-26 16:36:23 +01:00
2008-12-06 20:50:40 +01:00
for site in self . dirlist :
self . addImportDirectory ( self . dirlist [ site ] [ 0 ] , False , site , self . dirlist [ site ] [ 1 ] )
for file in self . filelist :
2009-03-13 11:21:48 +01:00
if os . path . exists ( file ) :
stat_info = os . stat ( file )
try :
lastupdate = self . updated [ file ]
if stat_info . st_mtime > lastupdate :
self . import_file_dict ( file , self . filelist [ file ] [ 0 ] , self . filelist [ file ] [ 1 ] )
self . updated [ file ] = time ( )
except :
2008-12-06 20:50:40 +01:00
self . updated [ file ] = time ( )
2009-03-13 11:21:48 +01:00
# If modified in the last minute run an immediate import.
# This codepath only runs first time the file is found.
if os . path . isdir ( file ) or ( time ( ) - stat_info . st_mtime ) < 60 :
# TODO attach a HHC thread to the file
# TODO import the output of the HHC thread -- this needs to wait for the HHC to block?
self . import_file_dict ( file , self . filelist [ file ] [ 0 ] , self . filelist [ file ] [ 1 ] )
2009-02-26 16:36:23 +01:00
# TODO we also test if directory, why?
#if os.path.isdir(file):
#self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1])
2009-03-13 11:21:48 +01:00
else :
removeFromFileList [ file ] = True
2009-03-08 21:28:09 +01:00
self . addToDirList = filter ( lambda x : self . addImportDirectory ( x , True , self . addToDirList [ x ] [ 0 ] , self . addToDirList [ x ] [ 1 ] ) , self . addToDirList )
2009-03-26 23:55:16 +01:00
2009-02-26 04:44:03 +01:00
for file in self . removeFromFileList :
2009-02-26 05:17:36 +01:00
if file in self . filelist :
del self . filelist [ file ]
2009-02-19 11:38:51 +01:00
self . addToDirList = { }
2009-02-26 04:44:03 +01:00
self . removeFromFileList = { }
2008-12-06 20:50:40 +01:00
# This is now an internal function that should not be called directly.
def import_file_dict ( self , file , site , filter ) :
2009-02-19 11:38:51 +01:00
if os . path . isdir ( file ) :
self . addToDirList [ file ] = [ site ] + [ filter ]
return
if filter == " passthrough " or filter == " " :
2009-01-16 18:24:00 +01:00
( stored , duplicates , partial , errors , ttime ) = self . import_fpdb_file ( file , site )
2008-12-06 20:50:40 +01:00
else :
2009-02-05 10:28:18 +01:00
conv = None
# Load filter, process file, pass returned filename to import_fpdb_file
2009-02-19 11:38:51 +01:00
2009-03-12 16:31:29 +01:00
print " converting %s " % file
hhbase = self . config . get_import_parameters ( ) . get ( " hhArchiveBase " )
hhbase = os . path . expanduser ( hhbase )
hhdir = os . path . join ( hhbase , site )
try :
out_path = os . path . join ( hhdir , file . split ( os . path . sep ) [ - 2 ] + " - " + os . path . basename ( file ) )
except :
out_path = os . path . join ( hhdir , " x " + strftime ( " %d - % m- % y " ) + os . path . basename ( file ) )
2009-03-13 21:00:12 +01:00
filter_name = filter . replace ( " ToFpdb " , " " )
mod = __import__ ( filter )
obj = getattr ( mod , filter_name , None )
if callable ( obj ) :
conv = obj ( in_path = file , out_path = out_path )
if ( conv . getStatus ( ) ) :
( stored , duplicates , partial , errors , ttime ) = self . import_fpdb_file ( out_path , site )
else :
# conversion didn't work
# TODO: appropriate response?
return ( 0 , 0 , 0 , 1 , 0 )
2009-02-19 11:38:51 +01:00
else :
2009-03-13 21:00:12 +01:00
print " Unknown filter filter_name: ' %s ' in filter: ' %s ' " % ( filter_name , filter )
2009-02-19 11:38:51 +01:00
return
2009-02-05 10:28:18 +01:00
#This will barf if conv.getStatus != True
2009-01-16 18:24:00 +01:00
return ( stored , duplicates , partial , errors , ttime )
2008-12-06 20:50:40 +01:00
def import_fpdb_file ( self , file , site ) :
starttime = time ( )
2009-03-26 23:55:16 +01:00
last_read_hand = 0
2008-12-06 20:50:40 +01:00
loc = 0
2009-03-26 23:55:16 +01:00
if file == " stdin " :
inputFile = sys . stdin
2008-12-06 20:50:40 +01:00
else :
2009-02-26 04:44:03 +01:00
if os . path . exists ( file ) :
inputFile = open ( file , " rU " )
else :
2009-02-26 05:17:36 +01:00
self . removeFromFileList [ file ] = True
return ( 0 , 0 , 0 , 1 , 0 )
2009-02-25 18:40:39 +01:00
try :
loc = self . pos_in_file [ file ]
2009-02-26 04:44:03 +01:00
except :
pass
2008-12-06 20:50:40 +01:00
# Read input file into class and close file
inputFile . seek ( loc )
2009-03-26 23:55:16 +01:00
self . lines = fpdb_simple . removeTrailingEOL ( inputFile . readlines ( ) )
2008-12-06 20:50:40 +01:00
self . pos_in_file [ file ] = inputFile . tell ( )
inputFile . close ( )
try : # sometimes we seem to be getting an empty self.lines, in which case, we just want to return.
firstline = self . lines [ 0 ]
except :
2009-03-12 16:31:29 +01:00
print " DEBUG: import_fpdb_file: failed on self.lines[0]: ' %s ' ' %s ' ' %s ' ' %s ' " % ( file , site , self . lines , loc )
2009-01-19 03:32:34 +01:00
return ( 0 , 0 , 0 , 1 , 0 )
2008-12-06 20:50:40 +01:00
if firstline . find ( " Tournament Summary " ) != - 1 :
print " TODO: implement importing tournament summaries "
#self.faobs = readfile(inputFile)
#self.parseTourneyHistory()
return 0
2009-03-26 23:55:16 +01:00
site = fpdb_simple . recogniseSite ( firstline )
category = fpdb_simple . recogniseCategory ( firstline )
2008-12-06 20:50:40 +01:00
2009-03-26 23:55:16 +01:00
startpos = 0
stored = 0 #counter
duplicates = 0 #counter
partial = 0 #counter
errors = 0 #counter
2008-12-06 20:50:40 +01:00
2009-03-08 21:28:09 +01:00
for i in xrange ( len ( self . lines ) ) : #main loop, iterates through the lines of a file and calls the appropriate parser method
2009-03-26 23:55:16 +01:00
if len ( self . lines [ i ] ) < 2 :
endpos = i
hand = self . lines [ startpos : endpos ]
2008-12-06 20:50:40 +01:00
2009-03-26 23:55:16 +01:00
if len ( hand [ 0 ] ) < 2 :
hand = hand [ 1 : ]
2008-12-06 20:50:40 +01:00
cancelled = False
damaged = False
if ( site == " ftp " ) :
for i in range ( len ( hand ) ) :
2009-03-26 23:55:16 +01:00
if hand [ i ] . endswith ( " has been canceled " ) : #this is their typo. this is a typo, right?
cancelled = True
2009-01-16 16:24:01 +01:00
#FTP generates lines looking like:
#Seat 1: IOS Seat 2: kashman59 (big blind) showed [8c 9d] and won ($3.25) with a pair of Eights
#ie. Seat X multiple times on the same line in the summary section, when a new player sits down in the
#middle of the hand.
#TODO: Deal with this properly, either fix the file or make the parsing code work with this line.
2009-01-28 09:46:43 +01:00
if " Seat " in hand [ i ] :
mo = re . search ( " Seat [0-9]+: " , hand [ i ] )
if mo :
print " mo= " , mo , " \n mo.start= " , mo . start ( ) , " \n hand[i]= " , hand [ i ]
hand . insert ( i + 1 , hand [ i ] [ mo . start ( ) + 1 : ] )
hand [ i ] = hand [ i ] [ 0 : mo . start ( ) ]
2008-12-06 20:50:40 +01:00
2009-03-26 23:55:16 +01:00
if len ( hand ) < 3 :
2008-12-06 20:50:40 +01:00
pass
#todo: the above 2 lines are kind of a dirty hack, the mentioned circumstances should be handled elsewhere but that doesnt work with DOS/Win EOL. actually this doesnt work.
2009-03-26 23:55:16 +01:00
elif hand [ 0 ] . endswith ( " (partial) " ) : #partial hand - do nothing
partial + = 1
elif " Seat " not in hand [ 1 ] and " Seat " not in hand [ 2 ] and " Seat " not in hand [ 3 ] :
partial + = 1
elif cancelled or damaged :
partial + = 1
2009-01-16 16:24:01 +01:00
if damaged :
print """
DEBUG : Partial hand triggered by a line containing ' Seat X: ' twice . This is a
bug in the FTP software when a player sits down in the middle of a hand .
Adding a newline after the player name will fix the issue
"""
print " File: %s " % ( file )
print " Line: %s " % ( startpos )
2008-12-06 20:50:40 +01:00
else : #normal processing
2009-03-26 23:55:16 +01:00
isTourney = fpdb_simple . isTourney ( hand [ 0 ] )
2008-12-06 20:50:40 +01:00
if not isTourney :
2009-03-24 05:30:27 +01:00
hand = fpdb_simple . filterAnteBlindFold ( site , hand )
2008-12-06 20:50:40 +01:00
self . hand = hand
try :
2009-03-26 23:55:16 +01:00
handsId = fpdb_parse_logic . mainParser ( self . settings [ ' db-backend ' ] , self . fdb . db
2009-03-15 22:40:01 +01:00
, self . fdb . cursor , site , category , hand , self . config )
2008-12-11 00:22:47 +01:00
self . fdb . db . commit ( )
2008-12-06 20:50:40 +01:00
2009-03-26 23:55:16 +01:00
stored + = 1
2008-12-06 20:50:40 +01:00
if self . callHud :
#print "call to HUD here. handsId:",handsId
#pipe the Hands.id out to the HUD
self . caller . pipe_to_hud . stdin . write ( " %s " % ( handsId ) + os . linesep )
except fpdb_simple . DuplicateError :
2009-03-26 23:55:16 +01:00
duplicates + = 1
2008-12-06 20:50:40 +01:00
except ( ValueError ) , fe :
2009-03-26 23:55:16 +01:00
errors + = 1
2008-12-06 20:50:40 +01:00
self . printEmailErrorMessage ( errors , file , hand )
if ( self . settings [ ' failOnError ' ] ) :
2008-12-11 00:22:47 +01:00
self . fdb . db . commit ( ) #dont remove this, in case hand processing was cancelled.
2008-12-06 20:50:40 +01:00
raise
except ( fpdb_simple . FpdbError ) , fe :
2009-03-26 23:55:16 +01:00
errors + = 1
2008-12-06 20:50:40 +01:00
self . printEmailErrorMessage ( errors , file , hand )
#fe.printStackTrace() #todo: get stacktrace
2008-12-11 00:22:47 +01:00
self . fdb . db . rollback ( )
2008-12-06 20:50:40 +01:00
2009-03-26 23:55:16 +01:00
if self . settings [ ' failOnError ' ] :
2008-12-11 00:22:47 +01:00
self . fdb . db . commit ( ) #dont remove this, in case hand processing was cancelled.
2008-12-06 20:50:40 +01:00
raise
2009-03-26 23:55:16 +01:00
if self . settings [ ' minPrint ' ] :
if not ( ( stored + duplicates + partial + errors ) % self . settings [ ' minPrint ' ] ) :
2008-12-06 20:50:40 +01:00
print " stored: " , stored , " duplicates: " , duplicates , " partial: " , partial , " errors: " , errors
2009-03-26 23:55:16 +01:00
if self . settings [ ' handCount ' ] :
if ( ( stored + duplicates + partial + errors ) > = self . settings [ ' handCount ' ] ) :
if not self . settings [ ' quiet ' ] :
2008-12-06 20:50:40 +01:00
print " quitting due to reaching the amount of hands to be imported "
print " Total stored: " , stored , " duplicates: " , duplicates , " partial/damaged: " , partial , " errors: " , errors , " time: " , ( time ( ) - starttime )
sys . exit ( 0 )
2009-03-26 23:55:16 +01:00
startpos = endpos
2009-01-16 18:24:00 +01:00
ttime = time ( ) - starttime
2009-02-24 16:54:02 +01:00
print " \r Total stored: " , stored , " duplicates: " , duplicates , " partial: " , partial , " errors: " , errors , " time: " , ttime
2008-12-06 20:50:40 +01:00
2009-03-26 23:55:16 +01:00
if not stored :
if duplicates :
for line_no in xrange ( len ( self . lines ) ) :
2008-12-06 20:50:40 +01:00
if self . lines [ line_no ] . find ( " Game # " ) != - 1 :
final_game_line = self . lines [ line_no ]
handsId = fpdb_simple . parseSiteHandNo ( final_game_line )
else :
print " failed to read a single hand from file: " , inputFile
handsId = 0
#todo: this will cause return of an unstored hand number if the last hand was error or partial
2008-12-11 00:22:47 +01:00
self . fdb . db . commit ( )
2008-12-06 20:50:40 +01:00
self . handsId = handsId
2009-01-16 18:24:00 +01:00
return ( stored , duplicates , partial , errors , ttime )
2008-08-04 05:44:28 +02:00
2008-12-18 23:39:43 +01:00
def parseTourneyHistory ( self ) :
print " Tourney history parser stub "
#Find tournament boundaries.
#print self.foabs
def printEmailErrorMessage ( self , errors , filename , line ) :
traceback . print_exc ( file = sys . stderr )
print " Error No. " , errors , " , please send the hand causing this to steffen@sycamoretest.info so I can fix it. "
print " Filename: " , filename
print " Here is the first line so you can identify it. Please mention that the error was a ValueError: "
print self . hand [ 0 ]
print " Hand logged to hand-errors.txt "
logfile = open ( ' hand-errors.txt ' , ' a ' )
for s in self . hand :
logfile . write ( str ( s ) + " \n " )
logfile . write ( " \n " )
logfile . close ( )
2008-08-04 05:44:28 +02:00
if __name__ == " __main__ " :
2008-12-06 20:50:40 +01:00
print " CLI for fpdb_import is now available as CliFpdb.py "