53c796dddc
Had a report on the 2+2 thread that: File "C:\Documents and Settings\b\Desktop\fpdb\pyfpdb\GuiAutoImport.py", line 160, in do_import self.importer.runUpdated() File "C:\Documents and Settings\b\Desktop\fpdb\pyfpdb\fpdb_import.py", line 371, in runUpdated (stored, duplicates, partial, errors, ttime) = self.import_file_dict(self.database, file, self.filelist[file][0], self.filelist[file][1], None) File "C:\Documents and Settings\b\Desktop\fpdb\pyfpdb\fpdb_import.py", line 467, in import_file_dict print "fpdb_import: sending hand to hud", hand.dbid_hands, "pipe =", self.caller.pipe_to_hud IOError: [Errno 9] Bad file descriptor Was happening, which is a crash attempting to print self.caller.pipe_to_hud This patch doesn't fix the problem, but should give some indication in the log that it happened.
506 lines
22 KiB
Python
Executable File
506 lines
22 KiB
Python
Executable File
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#Copyright 2008-2010 Steffen Schaumburg
|
|
#This program is free software: you can redistribute it and/or modify
|
|
#it under the terms of the GNU Affero General Public License as published by
|
|
#the Free Software Foundation, version 3 of the License.
|
|
#
|
|
#This program is distributed in the hope that it will be useful,
|
|
#but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
#GNU General Public License for more details.
|
|
#
|
|
#You should have received a copy of the GNU Affero General Public License
|
|
#along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#In the "official" distribution you can find the license in agpl-3.0.txt.
|
|
|
|
# Standard Library modules
|
|
|
|
import os # todo: remove this once import_dir is in fpdb_import
|
|
import sys
|
|
from time import time, strftime, sleep, clock
|
|
import traceback
|
|
import math
|
|
import datetime
|
|
import re
|
|
import Queue
|
|
from collections import deque # using Queue for now
|
|
import threading
|
|
|
|
import logging
|
|
# logging has been set up in fpdb.py or HUD_main.py, use their settings:
|
|
log = logging.getLogger("importer")
|
|
|
|
import pygtk
|
|
import gtk
|
|
|
|
# fpdb/FreePokerTools modules
|
|
|
|
import Database
|
|
import Configuration
|
|
import Exceptions
|
|
|
|
|
|
# database interface modules
|
|
try:
|
|
import MySQLdb
|
|
except ImportError:
|
|
log.debug("Import database module: MySQLdb not found")
|
|
else:
|
|
mysqlLibFound = True
|
|
|
|
try:
|
|
import psycopg2
|
|
except ImportError:
|
|
log.debug("Import database module: psycopg2 not found")
|
|
else:
|
|
import psycopg2.extensions
|
|
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
|
|
|
|
class Importer:
|
|
def __init__(self, caller, settings, config, sql = None):
|
|
"""Constructor"""
|
|
self.settings = settings
|
|
self.caller = caller
|
|
self.config = config
|
|
self.sql = sql
|
|
|
|
#log = Configuration.get_logger("logging.conf", "importer", log_dir=self.config.dir_log)
|
|
self.filelist = {}
|
|
self.dirlist = {}
|
|
self.siteIds = {}
|
|
self.addToDirList = {}
|
|
self.removeFromFileList = {} # to remove deleted files
|
|
self.monitor = False
|
|
self.updatedsize = {}
|
|
self.updatedtime = {}
|
|
self.lines = None
|
|
self.faobs = None # File as one big string
|
|
self.pos_in_file = {} # dict to remember how far we have read in the file
|
|
#Set defaults
|
|
self.callHud = self.config.get_import_parameters().get("callFpdbHud")
|
|
|
|
# CONFIGURATION OPTIONS
|
|
self.settings.setdefault("minPrint", 30)
|
|
self.settings.setdefault("handCount", 0)
|
|
#self.settings.setdefault("allowHudcacheRebuild", True) # NOT USED NOW
|
|
#self.settings.setdefault("forceThreads", 2) # NOT USED NOW
|
|
self.settings.setdefault("writeQSize", 1000) # no need to change
|
|
self.settings.setdefault("writeQMaxWait", 10) # not used
|
|
self.settings.setdefault("dropIndexes", "don't drop")
|
|
self.settings.setdefault("dropHudCache", "don't drop")
|
|
self.settings.setdefault("starsArchive", False)
|
|
|
|
self.writeq = None
|
|
self.database = Database.Database(self.config, sql = self.sql)
|
|
self.writerdbs = []
|
|
self.settings.setdefault("threads", 1) # value set by GuiBulkImport
|
|
for i in xrange(self.settings['threads']):
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
|
|
|
clock() # init clock in windows
|
|
|
|
#Set functions
|
|
def setCallHud(self, value):
|
|
self.callHud = value
|
|
|
|
def setMinPrint(self, value):
|
|
self.settings['minPrint'] = int(value)
|
|
|
|
def setHandCount(self, value):
|
|
self.settings['handCount'] = int(value)
|
|
|
|
def setQuiet(self, value):
|
|
self.settings['quiet'] = value
|
|
|
|
def setFailOnError(self, value):
|
|
self.settings['failOnError'] = value
|
|
|
|
def setHandsInDB(self, value):
|
|
self.settings['handsInDB'] = value
|
|
|
|
def setThreads(self, value):
|
|
self.settings['threads'] = value
|
|
if self.settings["threads"] > len(self.writerdbs):
|
|
for i in xrange(self.settings['threads'] - len(self.writerdbs)):
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
|
|
|
def setDropIndexes(self, value):
|
|
self.settings['dropIndexes'] = value
|
|
|
|
def setDropHudCache(self, value):
|
|
self.settings['dropHudCache'] = value
|
|
|
|
def setStarsArchive(self, value):
|
|
self.settings['starsArchive'] = value
|
|
|
|
# def setWatchTime(self):
|
|
# self.updated = time()
|
|
|
|
def clearFileList(self):
|
|
self.updatedsize = {}
|
|
self.updatetime = {}
|
|
self.pos_in_file = {}
|
|
self.filelist = {}
|
|
|
|
def closeDBs(self):
|
|
self.database.disconnect()
|
|
for i in xrange(len(self.writerdbs)):
|
|
self.writerdbs[i].disconnect()
|
|
|
|
#Add an individual file to filelist
|
|
def addImportFile(self, filename, site = "default", filter = "passthrough"):
|
|
#TODO: test it is a valid file -> put that in config!!
|
|
#print "addimportfile: filename is a", filename.__class__
|
|
# filename now comes in as unicode
|
|
if filename in self.filelist or not os.path.exists(filename):
|
|
return
|
|
self.filelist[filename] = [site] + [filter]
|
|
if site not in self.siteIds:
|
|
# Get id from Sites table in DB
|
|
result = self.database.get_site_id(site)
|
|
if len(result) == 1:
|
|
self.siteIds[site] = result[0][0]
|
|
else:
|
|
if len(result) == 0:
|
|
log.error("Database ID for %s not found" % site)
|
|
else:
|
|
log.error("[ERROR] More than 1 Database ID found for %s - Multiple currencies not implemented yet" % site)
|
|
|
|
|
|
# Called from GuiBulkImport to add a file or directory.
|
|
def addBulkImportImportFileOrDir(self, inputPath, site = "PokerStars"):
|
|
"""Add a file or directory for bulk import"""
|
|
filter = self.config.hhcs[site].converter
|
|
# Bulk import never monitors
|
|
# if directory, add all files in it. Otherwise add single file.
|
|
# TODO: only add sane files?
|
|
if os.path.isdir(inputPath):
|
|
for subdir in os.walk(inputPath):
|
|
for file in subdir[2]:
|
|
self.addImportFile(unicode(os.path.join(subdir[0], file),'utf-8'),
|
|
site=site, filter=filter)
|
|
else:
|
|
|
|
self.addImportFile(unicode(inputPath,'utf-8'), site=site, filter=filter)
|
|
#Add a directory of files to filelist
|
|
#Only one import directory per site supported.
|
|
#dirlist is a hash of lists:
|
|
#dirlist{ 'PokerStars' => ["/path/to/import/", "filtername"] }
|
|
def addImportDirectory(self,dir,monitor=False, site="default", filter="passthrough"):
|
|
#gets called by GuiAutoImport.
|
|
#This should really be using os.walk
|
|
#http://docs.python.org/library/os.html
|
|
if os.path.isdir(dir):
|
|
if monitor == True:
|
|
self.monitor = True
|
|
self.dirlist[site] = [dir] + [filter]
|
|
|
|
#print "addImportDirectory: checking files in", dir
|
|
for file in os.listdir(dir):
|
|
#print " adding file ", file
|
|
self.addImportFile(os.path.join(dir, file), site, filter)
|
|
else:
|
|
log.warning("Attempted to add non-directory: '%s' as an import directory" % str(dir))
|
|
|
|
def runImport(self):
|
|
""""Run full import on self.filelist. This is called from GuiBulkImport.py"""
|
|
#if self.settings['forceThreads'] > 0: # use forceThreads until threading enabled in GuiBulkImport
|
|
# self.setThreads(self.settings['forceThreads'])
|
|
|
|
# Initial setup
|
|
start = datetime.datetime.now()
|
|
starttime = time()
|
|
log.info("Started at %s -- %d files to import. indexes: %s" % (start, len(self.filelist), self.settings['dropIndexes']))
|
|
if self.settings['dropIndexes'] == 'auto':
|
|
self.settings['dropIndexes'] = self.calculate_auto2(self.database, 12.0, 500.0)
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'auto':
|
|
self.settings['dropHudCache'] = self.calculate_auto2(self.database, 25.0, 500.0) # returns "drop"/"don't drop"
|
|
|
|
if self.settings['dropIndexes'] == 'drop':
|
|
self.database.prepareBulkImport()
|
|
else:
|
|
log.debug("No need to drop indexes.")
|
|
#print "dropInd =", self.settings['dropIndexes'], " dropHudCache =", self.settings['dropHudCache']
|
|
|
|
if self.settings['threads'] <= 0:
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(self.database, None)
|
|
else:
|
|
# create queue (will probably change to deque at some point):
|
|
self.writeq = Queue.Queue( self.settings['writeQSize'] )
|
|
# start separate thread(s) to read hands from queue and write to db:
|
|
for i in xrange(self.settings['threads']):
|
|
t = threading.Thread( target=self.writerdbs[i].insert_queue_hands
|
|
, args=(self.writeq, self.settings["writeQMaxWait"])
|
|
, name="dbwriter-"+str(i) )
|
|
t.setDaemon(True)
|
|
t.start()
|
|
# read hands and write to q:
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(self.database, self.writeq)
|
|
|
|
if self.writeq.empty():
|
|
print "writers finished already"
|
|
pass
|
|
else:
|
|
print "waiting for writers to finish ..."
|
|
#for t in threading.enumerate():
|
|
# print " "+str(t)
|
|
#self.writeq.join()
|
|
#using empty() might be more reliable:
|
|
while not self.writeq.empty() and len(threading.enumerate()) > 1:
|
|
# TODO: Do we need to actually tell the progress indicator to move, or is it already moving, and we just need to process events...
|
|
while gtk.events_pending(): # see http://faq.pygtk.org/index.py?req=index for more hints (3.7)
|
|
gtk.main_iteration(False)
|
|
sleep(0.5)
|
|
print " ... writers finished"
|
|
|
|
# Tidying up after import
|
|
if self.settings['dropIndexes'] == 'drop':
|
|
self.database.afterBulkImport()
|
|
else:
|
|
print "No need to rebuild indexes."
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'drop':
|
|
self.database.rebuild_hudcache()
|
|
else:
|
|
print "No need to rebuild hudcache."
|
|
self.database.analyzeDB()
|
|
endtime = time()
|
|
return (totstored, totdups, totpartial, toterrors, endtime-starttime)
|
|
# end def runImport
|
|
|
|
def importFiles(self, db, q):
|
|
""""Read filenames in self.filelist and pass to import_file_dict().
|
|
Uses a separate database connection if created as a thread (caller
|
|
passes None or no param as db)."""
|
|
|
|
totstored = 0
|
|
totdups = 0
|
|
totpartial = 0
|
|
toterrors = 0
|
|
tottime = 0
|
|
for file in self.filelist:
|
|
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(db, file
|
|
,self.filelist[file][0], self.filelist[file][1], q)
|
|
totstored += stored
|
|
totdups += duplicates
|
|
totpartial += partial
|
|
toterrors += errors
|
|
|
|
for i in xrange( self.settings['threads'] ):
|
|
print "sending finish msg qlen =", q.qsize()
|
|
db.send_finish_msg(q)
|
|
|
|
return (totstored, totdups, totpartial, toterrors)
|
|
# end def importFiles
|
|
|
|
# not used currently
|
|
def calculate_auto(self, db):
|
|
"""An heuristic to determine a reasonable value of drop/don't drop"""
|
|
if len(self.filelist) == 1: return "don't drop"
|
|
if 'handsInDB' not in self.settings:
|
|
try:
|
|
tmpcursor = db.get_cursor()
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
except:
|
|
pass # if this fails we're probably doomed anyway
|
|
if self.settings['handsInDB'] < 5000: return "drop"
|
|
if len(self.filelist) < 50: return "don't drop"
|
|
if self.settings['handsInDB'] > 50000: return "don't drop"
|
|
return "drop"
|
|
|
|
def calculate_auto2(self, db, scale, increment):
|
|
"""A second heuristic to determine a reasonable value of drop/don't drop
|
|
This one adds up size of files to import to guess number of hands in them
|
|
Example values of scale and increment params might be 10 and 500 meaning
|
|
roughly: drop if importing more than 10% (100/scale) of hands in db or if
|
|
less than 500 hands in db"""
|
|
size_per_hand = 1300.0 # wag based on a PS 6-up FLHE file. Actual value not hugely important
|
|
# as values of scale and increment compensate for it anyway.
|
|
# decimal used to force float arithmetic
|
|
|
|
# get number of hands in db
|
|
if 'handsInDB' not in self.settings:
|
|
try:
|
|
tmpcursor = db.get_cursor()
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
except:
|
|
pass # if this fails we're probably doomed anyway
|
|
|
|
# add up size of import files
|
|
total_size = 0.0
|
|
for file in self.filelist:
|
|
if os.path.exists(file):
|
|
stat_info = os.stat(file)
|
|
total_size += stat_info.st_size
|
|
|
|
# if hands_in_db is zero or very low, we want to drop indexes, otherwise compare
|
|
# import size with db size somehow:
|
|
ret = "don't drop"
|
|
if self.settings['handsInDB'] < scale * (total_size/size_per_hand) + increment:
|
|
ret = "drop"
|
|
#print "auto2: handsindb =", self.settings['handsInDB'], "total_size =", total_size, "size_per_hand =", \
|
|
# size_per_hand, "inc =", increment, "return:", ret
|
|
return ret
|
|
|
|
#Run import on updated files, then store latest update time. Called from GuiAutoImport.py
|
|
def runUpdated(self):
|
|
#Check for new files in monitored directories
|
|
#todo: make efficient - always checks for new file, should be able to use mtime of directory
|
|
# ^^ May not work on windows
|
|
|
|
#rulog = open('runUpdated.txt', 'a')
|
|
#rulog.writelines("runUpdated ... ")
|
|
for site in self.dirlist:
|
|
self.addImportDirectory(self.dirlist[site][0], False, site, self.dirlist[site][1])
|
|
|
|
for file in self.filelist:
|
|
if os.path.exists(file):
|
|
stat_info = os.stat(file)
|
|
#rulog.writelines("path exists ")
|
|
if file in self.updatedsize: # we should be able to assume that if we're in size, we're in time as well
|
|
if stat_info.st_size > self.updatedsize[file] or stat_info.st_mtime > self.updatedtime[file]:
|
|
# print "file",counter," updated", os.path.basename(file), stat_info.st_size, self.updatedsize[file], stat_info.st_mtime, self.updatedtime[file]
|
|
try:
|
|
if not os.path.isdir(file):
|
|
self.caller.addText("\n"+os.path.basename(file))
|
|
except KeyError: # TODO: What error happens here?
|
|
pass
|
|
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(self.database, file, self.filelist[file][0], self.filelist[file][1], None)
|
|
try:
|
|
if not os.path.isdir(file): # Note: This assumes that whatever calls us has an "addText" func
|
|
self.caller.addText(" %d stored, %d duplicates, %d partial, %d errors (time = %f)" % (stored, duplicates, partial, errors, ttime))
|
|
except KeyError: # TODO: Again, what error happens here? fix when we find out ..
|
|
pass
|
|
self.updatedsize[file] = stat_info.st_size
|
|
self.updatedtime[file] = time()
|
|
else:
|
|
if os.path.isdir(file) or (time() - stat_info.st_mtime) < 60:
|
|
self.updatedsize[file] = 0
|
|
self.updatedtime[file] = 0
|
|
else:
|
|
self.updatedsize[file] = stat_info.st_size
|
|
self.updatedtime[file] = time()
|
|
else:
|
|
self.removeFromFileList[file] = True
|
|
|
|
self.addToDirList = filter(lambda x: self.addImportDirectory(x, True, self.addToDirList[x][0], self.addToDirList[x][1]), self.addToDirList)
|
|
|
|
for file in self.removeFromFileList:
|
|
if file in self.filelist:
|
|
del self.filelist[file]
|
|
|
|
self.addToDirList = {}
|
|
self.removeFromFileList = {}
|
|
self.database.rollback()
|
|
#rulog.writelines(" finished\n")
|
|
#rulog.close()
|
|
|
|
# This is now an internal function that should not be called directly.
|
|
def import_file_dict(self, db, file, site, filter, q=None):
|
|
#print "import_file_dict"
|
|
|
|
if os.path.isdir(file):
|
|
self.addToDirList[file] = [site] + [filter]
|
|
return (0,0,0,0,0)
|
|
|
|
conv = None
|
|
(stored, duplicates, partial, errors, ttime) = (0, 0, 0, 0, time())
|
|
|
|
# sc: is there any need to decode this? maybe easier to skip it than guess at the encoding?
|
|
#file = file.decode("utf-8") #(Configuration.LOCALE_ENCODING)
|
|
|
|
# Load filter, process file, pass returned filename to import_fpdb_file
|
|
if self.settings['threads'] > 0 and self.writeq is not None:
|
|
log.info("Converting " + file + " (" + str(q.qsize()) + ")")
|
|
else:
|
|
log.info("Converting " + file)
|
|
hhbase = self.config.get_import_parameters().get("hhArchiveBase")
|
|
hhbase = os.path.expanduser(hhbase)
|
|
hhdir = os.path.join(hhbase,site)
|
|
try:
|
|
out_path = os.path.join(hhdir, file.split(os.path.sep)[-2]+"-"+os.path.basename(file))
|
|
except:
|
|
out_path = os.path.join(hhdir, "x"+strftime("%d-%m-%y")+os.path.basename(file))
|
|
|
|
filter_name = filter.replace("ToFpdb", "")
|
|
|
|
mod = __import__(filter)
|
|
obj = getattr(mod, filter_name, None)
|
|
if callable(obj):
|
|
idx = 0
|
|
if file in self.pos_in_file:
|
|
idx = self.pos_in_file[file]
|
|
else:
|
|
self.pos_in_file[file] = 0
|
|
hhc = obj(self.config, in_path = file, out_path = out_path, index = idx, starsArchive = self.settings['starsArchive'])
|
|
if hhc.getStatus():
|
|
handlist = hhc.getProcessedHands()
|
|
self.pos_in_file[file] = hhc.getLastCharacterRead()
|
|
to_hud = []
|
|
|
|
for hand in handlist:
|
|
if hand is not None:
|
|
hand.prepInsert(self.database)
|
|
try:
|
|
hand.insert(self.database)
|
|
except Exceptions.FpdbHandDuplicate:
|
|
duplicates += 1
|
|
else:
|
|
if self.callHud and hand.dbid_hands != 0:
|
|
to_hud.append(hand.dbid_hands)
|
|
else: # TODO: Treat empty as an error, or just ignore?
|
|
log.error("Hand processed but empty")
|
|
|
|
# Call hudcache update if not in bulk import mode
|
|
# FIXME: Need to test for bulk import that isn't rebuilding the cache
|
|
if self.callHud:
|
|
for hand in handlist:
|
|
if hand is not None and not hand.is_duplicate:
|
|
hand.updateHudCache(self.database)
|
|
self.database.commit()
|
|
|
|
#pipe the Hands.id out to the HUD
|
|
for hid in to_hud:
|
|
try:
|
|
print "fpdb_import: sending hand to hud", hand.dbid_hands, "pipe =", self.caller.pipe_to_hud
|
|
self.caller.pipe_to_hud.stdin.write("%s" % (hid) + os.linesep)
|
|
except IOError, e:
|
|
log.error("Failed to send hand to HUD: %s" % e)
|
|
|
|
errors = getattr(hhc, 'numErrors')
|
|
stored = getattr(hhc, 'numHands')
|
|
stored -= duplicates
|
|
stored -= errors
|
|
else:
|
|
# conversion didn't work
|
|
# TODO: appropriate response?
|
|
return (0, 0, 0, 1, time() - ttime)
|
|
else:
|
|
log.warning("Unknown filter filter_name:'%s' in filter:'%s'" %(filter_name, filter))
|
|
return (0, 0, 0, 1, time() - ttime)
|
|
|
|
ttime = time() - ttime
|
|
|
|
#This will barf if conv.getStatus != True
|
|
return (stored, duplicates, partial, errors, ttime)
|
|
|
|
|
|
def printEmailErrorMessage(self, errors, filename, line):
|
|
traceback.print_exc(file=sys.stderr)
|
|
print "Error No.",errors,", please send the hand causing this to fpdb-main@lists.sourceforge.net so we can fix the problem."
|
|
print "Filename:", filename
|
|
print "Here is the first line of the hand so you can identify it. Please mention that the error was a ValueError:"
|
|
print self.hand[0]
|
|
print "Hand logged to hand-errors.txt"
|
|
logfile = open('hand-errors.txt', 'a')
|
|
for s in self.hand:
|
|
logfile.write(str(s) + "\n")
|
|
logfile.write("\n")
|
|
logfile.close()
|
|
|
|
if __name__ == "__main__":
|
|
print "CLI for fpdb_import is now available as CliFpdb.py"
|