fpdb/pyfpdb/fpdb_import.py

440 lines
19 KiB
Python
Raw Normal View History

#!/usr/bin/python
#Copyright 2008 Steffen Jobbagy-Felso
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU Affero General Public License as published by
#the Free Software Foundation, version 3 of the License.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU Affero General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#In the "official" distribution you can find the license in
#agpl-3.0.txt in the docs folder of the package.
#see status.txt for site/games support info
# Standard Library modules
import os # todo: remove this once import_dir is in fpdb_import
import sys
from time import time
import traceback
import math
import datetime
import re
# fpdb/FreePokerTools modules
import fpdb_simple
import fpdb_db
import fpdb_parse_logic
import Configuration
import EverleafToFpdb
2009-02-22 04:42:16 +01:00
import FulltiltToFpdb
# database interface modules
try:
import MySQLdb
mysqlLibFound=True
except:
pass
try:
import psycopg2
pgsqlLibFound=True
except:
pass
class Importer:
def __init__(self, caller, settings, config):
"""Constructor"""
self.settings=settings
self.caller=caller
self.config = config
self.fdb = None
self.cursor = None
self.filelist = {}
self.dirlist = {}
self.addToDirList = {}
self.removeFromFileList = {} # to remove deleted files
self.monitor = False
self.updated = {} #Time last import was run {file:mtime}
self.lines = None
self.faobs = None #File as one big string
self.pos_in_file = {} # dict to remember how far we have read in the file
#Set defaults
self.callHud = self.config.get_import_parameters().get("callFpdbHud")
if 'minPrint' not in self.settings:
#TODO: Is this value in the xml file?
self.settings['minPrint'] = 30
if 'handCount' not in self.settings:
#TODO: Is this value in the xml file?
self.settings['handCount'] = 0
self.fdb = fpdb_db.fpdb_db() # sets self.fdb.db self.fdb.cursor and self.fdb.sql
self.fdb.do_connect(self.config)
#Set functions
def setCallHud(self, value):
self.callHud = value
def setMinPrint(self, value):
self.settings['minPrint'] = int(value)
def setHandCount(self, value):
self.settings['handCount'] = int(value)
def setQuiet(self, value):
self.settings['quiet'] = value
def setFailOnError(self, value):
self.settings['failOnError'] = value
def setHandsInDB(self, value):
self.settings['handsInDB'] = value
def setThreads(self, value):
self.settings['threads'] = value
def setDropIndexes(self, value):
self.settings['dropIndexes'] = value
# def setWatchTime(self):
# self.updated = time()
def clearFileList(self):
self.filelist = {}
#Add an individual file to filelist
def addImportFile(self, filename, site = "default", filter = "passthrough"):
#TODO: test it is a valid file -> put that in config!!
self.filelist[filename] = [site] + [filter]
2009-02-25 15:59:11 +01:00
# Called from GuiBulkImport to add a file or directory.
def addBulkImportImportFileOrDir(self, inputPath,filter = "passthrough"):
"""Add a file or directory for bulk import"""
# Bulk import never monitors
# if directory, add all files in it. Otherwise add single file.
# TODO: only add sane files?
if os.path.isdir(inputPath):
for subdir in os.walk(inputPath):
for file in subdir[2]:
2009-02-25 16:16:18 +01:00
self.addImportFile(os.path.join(inputPath, subdir[0], file), site="default", filter=filter)
2009-02-25 15:59:11 +01:00
else:
self.addImportFile(inputPath, site="default", filter=filter)
#Add a directory of files to filelist
#Only one import directory per site supported.
#dirlist is a hash of lists:
#dirlist{ 'PokerStars' => ["/path/to/import/", "filtername"] }
def addImportDirectory(self,dir,monitor = False, site = "default", filter = "passthrough"):
#gets called by GuiAutoImport.
2009-02-24 14:46:05 +01:00
#This should really be using os.walk
#http://docs.python.org/library/os.html
if os.path.isdir(dir):
if monitor == True:
self.monitor = True
self.dirlist[site] = [dir] + [filter]
for file in os.listdir(dir):
self.addImportFile(os.path.join(dir, file), site, filter)
else:
print "Warning: Attempted to add non-directory: '" + str(dir) + "' as an import directory"
def runImport(self):
""""Run full import on self.filelist."""
start = datetime.datetime.now()
print "started at", start, "--", len(self.filelist), "files to import.", self.settings['dropIndexes']
if self.settings['dropIndexes'] == 'auto':
self.settings['dropIndexes'] = self.calculate_auto()
if self.settings['dropIndexes'] == 'drop':
fpdb_simple.prepareBulkImport(self.fdb)
2009-01-16 18:24:00 +01:00
totstored = 0
totdups = 0
totpartial = 0
toterrors = 0
tottime = 0
# if threads <= 1: do this bit
for file in self.filelist:
2009-01-16 18:24:00 +01:00
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1])
totstored += stored
totdups += duplicates
totpartial += partial
toterrors += errors
tottime += ttime
if self.settings['dropIndexes'] == 'drop':
fpdb_simple.afterBulkImport(self.fdb)
fpdb_simple.analyzeDB(self.fdb)
2009-01-16 18:24:00 +01:00
return (totstored, totdups, totpartial, toterrors, tottime)
# else: import threaded
def calculate_auto(self):
"""An heuristic to determine a reasonable value of drop/don't drop"""
if len(self.filelist) == 1: return "don't drop"
if self.settings['handsInDB'] < 5000: return "drop"
if len(self.filelist) < 50: return "don't drop"
if self.settings['handsInDB'] > 50000: return "don't drop"
return "drop"
#Run import on updated files, then store latest update time.
def runUpdated(self):
2009-02-26 16:36:23 +01:00
#Check for new files in monitored directories
#todo: make efficient - always checks for new file, should be able to use mtime of directory
# ^^ May not work on windows
2009-02-26 16:36:23 +01:00
for site in self.dirlist:
self.addImportDirectory(self.dirlist[site][0], False, site, self.dirlist[site][1])
for file in self.filelist:
stat_info = os.stat(file)
try:
lastupdate = self.updated[file]
if stat_info.st_mtime > lastupdate:
self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1])
self.updated[file] = time()
except:
self.updated[file] = time()
2009-02-26 16:36:23 +01:00
# If modified in the last minute run an immediate import.
# This codepath only runs first time the file is found.
if (time() - stat_info.st_mtime) < 60:
# TODO attach a HHC thread to the file
# TODO import the output of the HHC thread -- this needs to wait for the HHC to block?
self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1])
2009-02-26 16:36:23 +01:00
# TODO we also test if directory, why?
#if os.path.isdir(file):
#self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1])
for dir in self.addToDirList:
self.addImportDirectory(dir, True, self.addToDirList[dir][0], self.addToDirList[dir][1])
for file in self.removeFromFileList:
if file in self.filelist:
del self.filelist[file]
self.addToDirList = {}
self.removeFromFileList = {}
# This is now an internal function that should not be called directly.
def import_file_dict(self, file, site, filter):
if os.path.isdir(file):
self.addToDirList[file] = [site] + [filter]
return
if filter == "passthrough" or filter == "":
2009-01-16 18:24:00 +01:00
(stored, duplicates, partial, errors, ttime) = self.import_fpdb_file(file, site)
else:
conv = None
# Load filter, process file, pass returned filename to import_fpdb_file
# TODO: Shouldn't we be able to use some sort of lambda or something to just call a Python object by whatever name we specify? then we don't have to hardcode them,
# someone can just create their own python module for it
if filter == "EverleafToFpdb":
print "converting ", file
2009-02-26 16:36:23 +01:00
hhbase = self.config.get_import_parameters().get("hhArchiveBase")
hhbase = os.path.expanduser(hhbase)
hhdir = os.path.join(hhbase,site)
try:
out_path = os.path.join(hhdir, file.split(os.path.sep)[-2]+"-"+os.path.basename(file))
2009-02-26 16:36:23 +01:00
except:
out_path = os.path.join(hhdir, "x"+strftime("%d-%m-%y")+os.path.basename(file))
#out_fh = open(ofile, 'w') # TODO: seek to previous place in input and append output
conv = EverleafToFpdb.Everleaf(in_path = file, out_path = out_path)
conv.join()
elif filter == "FulltiltToFpdb":
print "converting ", file
2009-02-26 16:36:23 +01:00
conv = FulltiltToFpdb.FullTilt(in_fh = file, out_fh = out_fh)
else:
print "Unknown filter ", filter
return
supp = conv.readSupportedGames() # Should this be done by HHC on init?
2009-02-19 14:10:31 +01:00
#gt = conv.determineGameType()
# TODO: Check that gt is in supp - error appropriately if not
conv.processFile()
if(conv.getStatus()):
2009-02-19 14:10:31 +01:00
(stored, duplicates, partial, errors, ttime) = self.import_fpdb_file(conv.getProcessedFile(), site)
else:
# conversion didn't work
# TODO: appropriate response?
return (0, 0, 0, 1, 0)
#This will barf if conv.getStatus != True
2009-01-16 18:24:00 +01:00
return (stored, duplicates, partial, errors, ttime)
def import_fpdb_file(self, file, site):
starttime = time()
last_read_hand=0
loc = 0
if (file=="stdin"):
inputFile=sys.stdin
else:
if os.path.exists(file):
inputFile = open(file, "rU")
else:
self.removeFromFileList[file] = True
return (0, 0, 0, 1, 0)
try:
loc = self.pos_in_file[file]
except:
pass
# Read input file into class and close file
inputFile.seek(loc)
self.lines=fpdb_simple.removeTrailingEOL(inputFile.readlines())
self.pos_in_file[file] = inputFile.tell()
inputFile.close()
try: # sometimes we seem to be getting an empty self.lines, in which case, we just want to return.
firstline = self.lines[0]
except:
# print "import_fpdb_file", file, site, self.lines, "\n"
return (0,0,0,1,0)
if firstline.find("Tournament Summary")!=-1:
print "TODO: implement importing tournament summaries"
#self.faobs = readfile(inputFile)
#self.parseTourneyHistory()
return 0
site=fpdb_simple.recogniseSite(firstline)
category=fpdb_simple.recogniseCategory(firstline)
startpos=0
stored=0 #counter
duplicates=0 #counter
partial=0 #counter
errors=0 #counter
for i in range (len(self.lines)): #main loop, iterates through the lines of a file and calls the appropriate parser method
if (len(self.lines[i])<2):
endpos=i
hand=self.lines[startpos:endpos]
if (len(hand[0])<2):
hand=hand[1:]
cancelled=False
damaged=False
if (site=="ftp"):
for i in range (len(hand)):
if (hand[i].endswith(" has been canceled")): #this is their typo. this is a typo, right?
cancelled=True
#FTP generates lines looking like:
#Seat 1: IOS Seat 2: kashman59 (big blind) showed [8c 9d] and won ($3.25) with a pair of Eights
#ie. Seat X multiple times on the same line in the summary section, when a new player sits down in the
#middle of the hand.
#TODO: Deal with this properly, either fix the file or make the parsing code work with this line.
if "Seat" in hand[i]:
mo = re.search(" Seat [0-9]+: ", hand[i])
if mo:
print "mo=", mo, "\nmo.start=", mo.start(),"\nhand[i]=",hand[i]
hand.insert(i+1, hand[i][mo.start()+1:])
hand[i] = hand[i][0:mo.start()]
if (len(hand)<3):
pass
#todo: the above 2 lines are kind of a dirty hack, the mentioned circumstances should be handled elsewhere but that doesnt work with DOS/Win EOL. actually this doesnt work.
elif (hand[0].endswith(" (partial)")): #partial hand - do nothing
partial+=1
elif (hand[1].find("Seat")==-1 and hand[2].find("Seat")==-1 and hand[3].find("Seat")==-1):#todo: should this be or instead of and?
partial+=1
elif (cancelled or damaged):
partial+=1
if damaged:
print """
DEBUG: Partial hand triggered by a line containing 'Seat X:' twice. This is a
bug in the FTP software when a player sits down in the middle of a hand.
Adding a newline after the player name will fix the issue
"""
print "File: %s" %(file)
print "Line: %s" %(startpos)
else: #normal processing
isTourney=fpdb_simple.isTourney(hand[0])
if not isTourney:
fpdb_simple.filterAnteBlindFold(site,hand)
hand=fpdb_simple.filterCrap(site, hand, isTourney)
self.hand=hand
try:
handsId=fpdb_parse_logic.mainParser(self.settings['db-backend'], self.fdb.db
,self.fdb.cursor, site, category, hand)
self.fdb.db.commit()
stored+=1
if self.callHud:
#print "call to HUD here. handsId:",handsId
#pipe the Hands.id out to the HUD
self.caller.pipe_to_hud.stdin.write("%s" % (handsId) + os.linesep)
except fpdb_simple.DuplicateError:
duplicates+=1
except (ValueError), fe:
errors+=1
self.printEmailErrorMessage(errors, file, hand)
if (self.settings['failOnError']):
self.fdb.db.commit() #dont remove this, in case hand processing was cancelled.
raise
except (fpdb_simple.FpdbError), fe:
errors+=1
self.printEmailErrorMessage(errors, file, hand)
#fe.printStackTrace() #todo: get stacktrace
self.fdb.db.rollback()
if (self.settings['failOnError']):
self.fdb.db.commit() #dont remove this, in case hand processing was cancelled.
raise
if (self.settings['minPrint']!=0):
if ((stored+duplicates+partial+errors)%self.settings['minPrint']==0):
print "stored:", stored, "duplicates:", duplicates, "partial:", partial, "errors:", errors
if (self.settings['handCount']!=0):
if ((stored+duplicates+partial+errors)>=self.settings['handCount']):
if (not self.settings['quiet']):
print "quitting due to reaching the amount of hands to be imported"
print "Total stored:", stored, "duplicates:", duplicates, "partial/damaged:", partial, "errors:", errors, " time:", (time() - starttime)
sys.exit(0)
startpos=endpos
2009-01-16 18:24:00 +01:00
ttime = time() - starttime
print "\rTotal stored:", stored, "duplicates:", duplicates, "partial:", partial, "errors:", errors, " time:", ttime
if stored==0:
if duplicates>0:
for line_no in range(len(self.lines)):
if self.lines[line_no].find("Game #")!=-1:
final_game_line=self.lines[line_no]
handsId=fpdb_simple.parseSiteHandNo(final_game_line)
else:
print "failed to read a single hand from file:", inputFile
handsId=0
#todo: this will cause return of an unstored hand number if the last hand was error or partial
self.fdb.db.commit()
self.handsId=handsId
2009-01-16 18:24:00 +01:00
return (stored, duplicates, partial, errors, ttime)
2008-12-18 23:39:43 +01:00
def parseTourneyHistory(self):
print "Tourney history parser stub"
#Find tournament boundaries.
#print self.foabs
def printEmailErrorMessage(self, errors, filename, line):
traceback.print_exc(file=sys.stderr)
print "Error No.",errors,", please send the hand causing this to steffen@sycamoretest.info so I can fix it."
print "Filename:", filename
print "Here is the first line so you can identify it. Please mention that the error was a ValueError:"
print self.hand[0]
print "Hand logged to hand-errors.txt"
logfile = open('hand-errors.txt', 'a')
for s in self.hand:
logfile.write(str(s) + "\n")
logfile.write("\n")
logfile.close()
if __name__ == "__main__":
print "CLI for fpdb_import is now available as CliFpdb.py"