2008-08-04 05:44:28 +02:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
#Copyright 2008 Steffen Jobbagy-Felso
|
|
|
|
#This program is free software: you can redistribute it and/or modify
|
|
|
|
#it under the terms of the GNU Affero General Public License as published by
|
|
|
|
#the Free Software Foundation, version 3 of the License.
|
|
|
|
#
|
|
|
|
#This program is distributed in the hope that it will be useful,
|
|
|
|
#but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
#GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
#You should have received a copy of the GNU Affero General Public License
|
|
|
|
#along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#In the "official" distribution you can find the license in
|
|
|
|
#agpl-3.0.txt in the docs folder of the package.
|
|
|
|
|
|
|
|
#see status.txt for site/games support info
|
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# Standard Library modules
|
|
|
|
|
|
|
|
import os # todo: remove this once import_dir is in fpdb_import
|
2008-08-04 05:44:28 +02:00
|
|
|
import sys
|
2009-07-31 22:24:21 +02:00
|
|
|
from time import time, strftime, sleep
|
2009-02-07 16:06:48 +01:00
|
|
|
import traceback
|
|
|
|
import math
|
|
|
|
import datetime
|
|
|
|
import re
|
2009-07-31 22:24:21 +02:00
|
|
|
import Queue
|
|
|
|
from collections import deque # using Queue for now
|
|
|
|
import threading
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2009-09-10 05:10:55 +02:00
|
|
|
import pygtk
|
|
|
|
import gtk
|
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# fpdb/FreePokerTools modules
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
import fpdb_simple
|
|
|
|
import fpdb_db
|
2009-06-26 00:14:32 +02:00
|
|
|
import Database
|
2009-02-07 16:06:48 +01:00
|
|
|
import fpdb_parse_logic
|
|
|
|
import Configuration
|
2009-09-11 07:12:46 +02:00
|
|
|
import Exceptions
|
2009-02-07 16:06:48 +01:00
|
|
|
|
2009-11-10 01:30:23 +01:00
|
|
|
log = Configuration.get_logger("logging.conf", "importer")
|
2009-08-12 02:46:39 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# database interface modules
|
2008-09-16 23:19:50 +02:00
|
|
|
try:
|
2008-12-06 20:50:40 +01:00
|
|
|
import MySQLdb
|
2009-09-16 04:07:31 +02:00
|
|
|
except ImportError:
|
2009-09-16 03:10:18 +02:00
|
|
|
log.debug("Import database module: MySQLdb not found")
|
|
|
|
else:
|
|
|
|
mysqlLibFound = True
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2008-09-16 23:19:50 +02:00
|
|
|
try:
|
2008-12-06 20:50:40 +01:00
|
|
|
import psycopg2
|
2009-09-16 04:07:31 +02:00
|
|
|
except ImportError:
|
2009-09-16 03:10:18 +02:00
|
|
|
log.debug("Import database module: psycopg2 not found")
|
|
|
|
else:
|
2009-07-31 22:24:21 +02:00
|
|
|
import psycopg2.extensions
|
2009-06-06 17:17:49 +02:00
|
|
|
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2008-10-08 19:36:08 +02:00
|
|
|
class Importer:
|
2009-07-29 07:37:06 +02:00
|
|
|
def __init__(self, caller, settings, config, sql = None):
|
2008-12-06 20:50:40 +01:00
|
|
|
"""Constructor"""
|
2009-03-19 02:22:04 +01:00
|
|
|
self.settings = settings
|
|
|
|
self.caller = caller
|
|
|
|
self.config = config
|
2009-07-29 07:37:06 +02:00
|
|
|
self.sql = sql
|
|
|
|
|
2009-03-19 02:22:04 +01:00
|
|
|
self.filelist = {}
|
|
|
|
self.dirlist = {}
|
2009-03-21 13:23:51 +01:00
|
|
|
self.siteIds = {}
|
2009-02-19 11:38:51 +01:00
|
|
|
self.addToDirList = {}
|
2009-02-26 04:44:03 +01:00
|
|
|
self.removeFromFileList = {} # to remove deleted files
|
2009-03-19 02:22:04 +01:00
|
|
|
self.monitor = False
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize = {}
|
|
|
|
self.updatedtime = {}
|
2009-03-19 02:22:04 +01:00
|
|
|
self.lines = None
|
2009-03-21 13:23:51 +01:00
|
|
|
self.faobs = None # File as one big string
|
|
|
|
self.pos_in_file = {} # dict to remember how far we have read in the file
|
2008-12-06 20:50:40 +01:00
|
|
|
#Set defaults
|
2009-03-19 02:22:04 +01:00
|
|
|
self.callHud = self.config.get_import_parameters().get("callFpdbHud")
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-08-04 22:41:04 +02:00
|
|
|
# CONFIGURATION OPTIONS
|
2009-03-26 23:55:16 +01:00
|
|
|
self.settings.setdefault("minPrint", 30)
|
|
|
|
self.settings.setdefault("handCount", 0)
|
2009-08-04 22:41:04 +02:00
|
|
|
#self.settings.setdefault("allowHudcacheRebuild", True) # NOT USED NOW
|
|
|
|
#self.settings.setdefault("forceThreads", 2) # NOT USED NOW
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings.setdefault("writeQSize", 1000) # no need to change
|
|
|
|
self.settings.setdefault("writeQMaxWait", 10) # not used
|
2009-08-06 22:12:50 +02:00
|
|
|
self.settings.setdefault("dropIndexes", "don't drop")
|
|
|
|
self.settings.setdefault("dropHudCache", "don't drop")
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
self.writeq = None
|
|
|
|
self.database = Database.Database(self.config, sql = self.sql)
|
|
|
|
self.writerdbs = []
|
2009-08-04 22:41:04 +02:00
|
|
|
self.settings.setdefault("threads", 1) # value set by GuiBulkImport
|
2009-07-31 22:24:21 +02:00
|
|
|
for i in xrange(self.settings['threads']):
|
|
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-11-30 07:33:32 +01:00
|
|
|
self.NEWIMPORT = Configuration.NEWIMPORT
|
2009-07-17 11:16:06 +02:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
#Set functions
|
|
|
|
def setCallHud(self, value):
|
|
|
|
self.callHud = value
|
|
|
|
|
|
|
|
def setMinPrint(self, value):
|
|
|
|
self.settings['minPrint'] = int(value)
|
|
|
|
|
|
|
|
def setHandCount(self, value):
|
|
|
|
self.settings['handCount'] = int(value)
|
|
|
|
|
|
|
|
def setQuiet(self, value):
|
|
|
|
self.settings['quiet'] = value
|
|
|
|
|
|
|
|
def setFailOnError(self, value):
|
|
|
|
self.settings['failOnError'] = value
|
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
def setHandsInDB(self, value):
|
|
|
|
self.settings['handsInDB'] = value
|
|
|
|
|
|
|
|
def setThreads(self, value):
|
|
|
|
self.settings['threads'] = value
|
2009-07-31 22:24:21 +02:00
|
|
|
if self.settings["threads"] > len(self.writerdbs):
|
|
|
|
for i in xrange(self.settings['threads'] - len(self.writerdbs)):
|
|
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
2009-02-07 16:06:48 +01:00
|
|
|
|
|
|
|
def setDropIndexes(self, value):
|
|
|
|
self.settings['dropIndexes'] = value
|
|
|
|
|
2009-08-04 22:41:04 +02:00
|
|
|
def setDropHudCache(self, value):
|
|
|
|
self.settings['dropHudCache'] = value
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
# def setWatchTime(self):
|
|
|
|
# self.updated = time()
|
|
|
|
|
|
|
|
def clearFileList(self):
|
2009-08-06 05:30:21 +02:00
|
|
|
self.updatedsize = {}
|
|
|
|
self.updatetime = {}
|
|
|
|
self.pos_in_file = {}
|
2008-12-06 20:50:40 +01:00
|
|
|
self.filelist = {}
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
def closeDBs(self):
|
|
|
|
self.database.disconnect()
|
|
|
|
for i in xrange(len(self.writerdbs)):
|
|
|
|
self.writerdbs[i].disconnect()
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
#Add an individual file to filelist
|
|
|
|
def addImportFile(self, filename, site = "default", filter = "passthrough"):
|
2009-02-07 16:06:48 +01:00
|
|
|
#TODO: test it is a valid file -> put that in config!!
|
2009-09-11 07:12:46 +02:00
|
|
|
if filename in self.filelist or not os.path.exists(filename):
|
|
|
|
return
|
2008-12-06 20:50:40 +01:00
|
|
|
self.filelist[filename] = [site] + [filter]
|
2009-03-21 13:23:51 +01:00
|
|
|
if site not in self.siteIds:
|
|
|
|
# Get id from Sites table in DB
|
2009-07-29 00:58:10 +02:00
|
|
|
result = self.database.get_site_id(site)
|
2009-03-21 13:23:51 +01:00
|
|
|
if len(result) == 1:
|
|
|
|
self.siteIds[site] = result[0][0]
|
|
|
|
else:
|
|
|
|
if len(result) == 0:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.error("Database ID for %s not found" % site)
|
2009-03-21 13:23:51 +01:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.error("[ERROR] More than 1 Database ID found for %s - Multiple currencies not implemented yet" % site)
|
2009-03-21 13:23:51 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-02-25 15:59:11 +01:00
|
|
|
# Called from GuiBulkImport to add a file or directory.
|
2009-03-24 14:58:45 +01:00
|
|
|
def addBulkImportImportFileOrDir(self, inputPath, site = "PokerStars"):
|
2009-02-25 15:59:11 +01:00
|
|
|
"""Add a file or directory for bulk import"""
|
2009-03-24 14:58:45 +01:00
|
|
|
filter = self.config.hhcs[site].converter
|
2009-02-25 15:59:11 +01:00
|
|
|
# Bulk import never monitors
|
|
|
|
# if directory, add all files in it. Otherwise add single file.
|
|
|
|
# TODO: only add sane files?
|
|
|
|
if os.path.isdir(inputPath):
|
|
|
|
for subdir in os.walk(inputPath):
|
|
|
|
for file in subdir[2]:
|
2009-11-03 21:29:05 +01:00
|
|
|
self.addImportFile(os.path.join(subdir[0], file), site=site,
|
|
|
|
filter=filter)
|
2009-02-25 15:59:11 +01:00
|
|
|
else:
|
2009-03-24 14:58:45 +01:00
|
|
|
self.addImportFile(inputPath, site=site, filter=filter)
|
2008-12-06 20:50:40 +01:00
|
|
|
#Add a directory of files to filelist
|
|
|
|
#Only one import directory per site supported.
|
|
|
|
#dirlist is a hash of lists:
|
|
|
|
#dirlist{ 'PokerStars' => ["/path/to/import/", "filtername"] }
|
2009-11-03 21:29:05 +01:00
|
|
|
def addImportDirectory(self,dir,monitor=False, site="default", filter="passthrough"):
|
2009-03-05 02:12:15 +01:00
|
|
|
#gets called by GuiAutoImport.
|
2009-02-24 14:46:05 +01:00
|
|
|
#This should really be using os.walk
|
|
|
|
#http://docs.python.org/library/os.html
|
2008-12-06 20:50:40 +01:00
|
|
|
if os.path.isdir(dir):
|
|
|
|
if monitor == True:
|
|
|
|
self.monitor = True
|
|
|
|
self.dirlist[site] = [dir] + [filter]
|
|
|
|
|
2009-06-07 21:45:09 +02:00
|
|
|
#print "addImportDirectory: checking files in", dir
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in os.listdir(dir):
|
2009-06-07 21:45:09 +02:00
|
|
|
#print " adding file ", file
|
2008-12-06 20:50:40 +01:00
|
|
|
self.addImportFile(os.path.join(dir, file), site, filter)
|
|
|
|
else:
|
2009-11-03 21:29:05 +01:00
|
|
|
log.warning("Attempted to add non-directory: '%s' as an import directory" % str(dir))
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
def runImport(self):
|
2009-07-31 22:24:21 +02:00
|
|
|
""""Run full import on self.filelist. This is called from GuiBulkImport.py"""
|
2009-08-04 22:41:04 +02:00
|
|
|
#if self.settings['forceThreads'] > 0: # use forceThreads until threading enabled in GuiBulkImport
|
|
|
|
# self.setThreads(self.settings['forceThreads'])
|
2009-07-21 23:26:23 +02:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
# Initial setup
|
2009-02-07 16:06:48 +01:00
|
|
|
start = datetime.datetime.now()
|
2009-07-31 22:24:21 +02:00
|
|
|
starttime = time()
|
2009-08-12 02:46:39 +02:00
|
|
|
log.info("Started at %s -- %d files to import. indexes: %s" % (start, len(self.filelist), self.settings['dropIndexes']))
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['dropIndexes'] == 'auto':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings['dropIndexes'] = self.calculate_auto2(self.database, 12.0, 500.0)
|
2009-08-06 03:07:16 +02:00
|
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'auto':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings['dropHudCache'] = self.calculate_auto2(self.database, 25.0, 500.0) # returns "drop"/"don't drop"
|
2009-07-21 23:26:23 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['dropIndexes'] == 'drop':
|
2009-07-29 00:58:10 +02:00
|
|
|
self.database.prepareBulkImport()
|
2009-07-21 23:26:23 +02:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.debug("No need to drop indexes.")
|
2009-07-21 23:26:23 +02:00
|
|
|
#print "dropInd =", self.settings['dropIndexes'], " dropHudCache =", self.settings['dropHudCache']
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
if self.settings['threads'] <= 0:
|
|
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(self.database, None)
|
|
|
|
else:
|
|
|
|
# create queue (will probably change to deque at some point):
|
|
|
|
self.writeq = Queue.Queue( self.settings['writeQSize'] )
|
|
|
|
# start separate thread(s) to read hands from queue and write to db:
|
|
|
|
for i in xrange(self.settings['threads']):
|
|
|
|
t = threading.Thread( target=self.writerdbs[i].insert_queue_hands
|
|
|
|
, args=(self.writeq, self.settings["writeQMaxWait"])
|
|
|
|
, name="dbwriter-"+str(i) )
|
|
|
|
t.setDaemon(True)
|
|
|
|
t.start()
|
|
|
|
# read hands and write to q:
|
|
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(self.database, self.writeq)
|
|
|
|
|
|
|
|
if self.writeq.empty():
|
|
|
|
print "writers finished already"
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
print "waiting for writers to finish ..."
|
|
|
|
#for t in threading.enumerate():
|
|
|
|
# print " "+str(t)
|
|
|
|
#self.writeq.join()
|
|
|
|
#using empty() might be more reliable:
|
|
|
|
while not self.writeq.empty() and len(threading.enumerate()) > 1:
|
2009-11-25 14:22:14 +01:00
|
|
|
# TODO: Do we need to actually tell the progress indicator to move, or is it already moving, and we just need to process events...
|
2009-11-03 21:29:05 +01:00
|
|
|
while gtk.events_pending(): # see http://faq.pygtk.org/index.py?req=index for more hints (3.7)
|
2009-11-25 14:22:14 +01:00
|
|
|
gtk.main_iteration(False)
|
2009-07-31 22:24:21 +02:00
|
|
|
sleep(0.5)
|
|
|
|
print " ... writers finished"
|
|
|
|
|
|
|
|
# Tidying up after import
|
|
|
|
if self.settings['dropIndexes'] == 'drop':
|
|
|
|
self.database.afterBulkImport()
|
|
|
|
else:
|
|
|
|
print "No need to rebuild indexes."
|
2009-08-06 03:07:16 +02:00
|
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'drop':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.database.rebuild_hudcache()
|
|
|
|
else:
|
|
|
|
print "No need to rebuild hudcache."
|
|
|
|
self.database.analyzeDB()
|
|
|
|
endtime = time()
|
|
|
|
return (totstored, totdups, totpartial, toterrors, endtime-starttime)
|
|
|
|
# end def runImport
|
|
|
|
|
|
|
|
def importFiles(self, db, q):
|
|
|
|
""""Read filenames in self.filelist and pass to import_file_dict().
|
|
|
|
Uses a separate database connection if created as a thread (caller
|
|
|
|
passes None or no param as db)."""
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-01-16 18:24:00 +01:00
|
|
|
totstored = 0
|
|
|
|
totdups = 0
|
|
|
|
totpartial = 0
|
|
|
|
toterrors = 0
|
|
|
|
tottime = 0
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in self.filelist:
|
2009-07-31 22:24:21 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(db, file
|
|
|
|
,self.filelist[file][0], self.filelist[file][1], q)
|
2009-01-16 18:24:00 +01:00
|
|
|
totstored += stored
|
|
|
|
totdups += duplicates
|
|
|
|
totpartial += partial
|
|
|
|
toterrors += errors
|
2009-02-07 16:06:48 +01:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
for i in xrange( self.settings['threads'] ):
|
|
|
|
print "sending finish msg qlen =", q.qsize()
|
|
|
|
db.send_finish_msg(q)
|
|
|
|
|
|
|
|
return (totstored, totdups, totpartial, toterrors)
|
|
|
|
# end def importFiles
|
|
|
|
|
|
|
|
# not used currently
|
|
|
|
def calculate_auto(self, db):
|
2009-02-07 16:06:48 +01:00
|
|
|
"""An heuristic to determine a reasonable value of drop/don't drop"""
|
2009-03-21 15:27:49 +01:00
|
|
|
if len(self.filelist) == 1: return "don't drop"
|
|
|
|
if 'handsInDB' not in self.settings:
|
|
|
|
try:
|
2009-07-31 22:24:21 +02:00
|
|
|
tmpcursor = db.get_cursor()
|
2009-03-21 15:27:49 +01:00
|
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
|
|
except:
|
|
|
|
pass # if this fails we're probably doomed anyway
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['handsInDB'] < 5000: return "drop"
|
2009-11-25 14:22:14 +01:00
|
|
|
if len(self.filelist) < 50: return "don't drop"
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['handsInDB'] > 50000: return "don't drop"
|
|
|
|
return "drop"
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
def calculate_auto2(self, db, scale, increment):
|
2009-06-09 23:22:10 +02:00
|
|
|
"""A second heuristic to determine a reasonable value of drop/don't drop
|
|
|
|
This one adds up size of files to import to guess number of hands in them
|
|
|
|
Example values of scale and increment params might be 10 and 500 meaning
|
|
|
|
roughly: drop if importing more than 10% (100/scale) of hands in db or if
|
|
|
|
less than 500 hands in db"""
|
|
|
|
size_per_hand = 1300.0 # wag based on a PS 6-up FLHE file. Actual value not hugely important
|
|
|
|
# as values of scale and increment compensate for it anyway.
|
|
|
|
# decimal used to force float arithmetic
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-09 23:22:10 +02:00
|
|
|
# get number of hands in db
|
|
|
|
if 'handsInDB' not in self.settings:
|
|
|
|
try:
|
2009-07-31 22:24:21 +02:00
|
|
|
tmpcursor = db.get_cursor()
|
2009-06-09 23:22:10 +02:00
|
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
|
|
except:
|
|
|
|
pass # if this fails we're probably doomed anyway
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-09 23:22:10 +02:00
|
|
|
# add up size of import files
|
|
|
|
total_size = 0.0
|
|
|
|
for file in self.filelist:
|
|
|
|
if os.path.exists(file):
|
|
|
|
stat_info = os.stat(file)
|
|
|
|
total_size += stat_info.st_size
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
# if hands_in_db is zero or very low, we want to drop indexes, otherwise compare
|
2009-06-09 23:22:10 +02:00
|
|
|
# import size with db size somehow:
|
2009-07-21 23:26:23 +02:00
|
|
|
ret = "don't drop"
|
2009-06-09 23:22:10 +02:00
|
|
|
if self.settings['handsInDB'] < scale * (total_size/size_per_hand) + increment:
|
2009-07-21 23:26:23 +02:00
|
|
|
ret = "drop"
|
|
|
|
#print "auto2: handsindb =", self.settings['handsInDB'], "total_size =", total_size, "size_per_hand =", \
|
|
|
|
# size_per_hand, "inc =", increment, "return:", ret
|
|
|
|
return ret
|
2009-06-09 23:22:10 +02:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
#Run import on updated files, then store latest update time. Called from GuiAutoImport.py
|
2008-12-06 20:50:40 +01:00
|
|
|
def runUpdated(self):
|
2009-02-26 16:36:23 +01:00
|
|
|
#Check for new files in monitored directories
|
2008-12-06 20:50:40 +01:00
|
|
|
#todo: make efficient - always checks for new file, should be able to use mtime of directory
|
|
|
|
# ^^ May not work on windows
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog = open('runUpdated.txt', 'a')
|
|
|
|
#rulog.writelines("runUpdated ... ")
|
2008-12-06 20:50:40 +01:00
|
|
|
for site in self.dirlist:
|
|
|
|
self.addImportDirectory(self.dirlist[site][0], False, site, self.dirlist[site][1])
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in self.filelist:
|
2009-03-19 16:27:08 +01:00
|
|
|
if os.path.exists(file):
|
2009-03-13 11:21:48 +01:00
|
|
|
stat_info = os.stat(file)
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog.writelines("path exists ")
|
2009-07-31 07:15:28 +02:00
|
|
|
if file in self.updatedsize: # we should be able to assume that if we're in size, we're in time as well
|
|
|
|
if stat_info.st_size > self.updatedsize[file] or stat_info.st_mtime > self.updatedtime[file]:
|
2009-09-11 07:12:46 +02:00
|
|
|
# print "file",counter," updated", os.path.basename(file), stat_info.st_size, self.updatedsize[file], stat_info.st_mtime, self.updatedtime[file]
|
2009-11-30 05:52:36 +01:00
|
|
|
try:
|
|
|
|
if not os.path.isdir(file):
|
|
|
|
self.caller.addText("\n"+file)
|
|
|
|
except KeyError: # TODO: What error happens here?
|
|
|
|
pass
|
2009-07-31 23:30:41 +02:00
|
|
|
self.import_file_dict(self.database, file, self.filelist[file][0], self.filelist[file][1], None)
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = stat_info.st_size
|
|
|
|
self.updatedtime[file] = time()
|
2009-07-31 03:50:08 +02:00
|
|
|
else:
|
2009-03-13 11:21:48 +01:00
|
|
|
if os.path.isdir(file) or (time() - stat_info.st_mtime) < 60:
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = 0
|
|
|
|
self.updatedtime[file] = 0
|
2009-07-31 03:50:08 +02:00
|
|
|
else:
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = stat_info.st_size
|
|
|
|
self.updatedtime[file] = time()
|
2009-03-13 11:21:48 +01:00
|
|
|
else:
|
2009-05-27 23:21:22 +02:00
|
|
|
self.removeFromFileList[file] = True
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-03-10 14:00:37 +01:00
|
|
|
self.addToDirList = filter(lambda x: self.addImportDirectory(x, True, self.addToDirList[x][0], self.addToDirList[x][1]), self.addToDirList)
|
2009-03-27 16:54:13 +01:00
|
|
|
|
2009-02-26 04:44:03 +01:00
|
|
|
for file in self.removeFromFileList:
|
2009-02-26 05:17:36 +01:00
|
|
|
if file in self.filelist:
|
|
|
|
del self.filelist[file]
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-02-19 11:38:51 +01:00
|
|
|
self.addToDirList = {}
|
2009-02-26 04:44:03 +01:00
|
|
|
self.removeFromFileList = {}
|
2009-07-29 00:58:10 +02:00
|
|
|
self.database.rollback()
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog.writelines(" finished\n")
|
|
|
|
#rulog.close()
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
# This is now an internal function that should not be called directly.
|
2009-07-31 22:24:21 +02:00
|
|
|
def import_file_dict(self, db, file, site, filter, q=None):
|
2009-07-29 00:58:10 +02:00
|
|
|
#print "import_file_dict"
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-02-19 11:38:51 +01:00
|
|
|
if os.path.isdir(file):
|
|
|
|
self.addToDirList[file] = [site] + [filter]
|
|
|
|
return
|
2009-03-13 21:00:12 +01:00
|
|
|
|
2009-06-20 08:02:03 +02:00
|
|
|
conv = None
|
2009-07-31 22:24:21 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime) = (0, 0, 0, 0, 0)
|
|
|
|
|
2009-11-25 14:22:14 +01:00
|
|
|
file = file.decode(fpdb_simple.LOCALE_ENCODING)
|
2009-08-12 19:55:19 +02:00
|
|
|
|
2009-06-20 08:02:03 +02:00
|
|
|
# Load filter, process file, pass returned filename to import_fpdb_file
|
2009-11-03 20:30:52 +01:00
|
|
|
if self.settings['threads'] > 0 and self.writeq is not None:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.info("Converting " + file + " (" + str(q.qsize()) + ")")
|
2009-08-01 01:07:31 +02:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.info("Converting " + file)
|
2009-06-20 08:02:03 +02:00
|
|
|
hhbase = self.config.get_import_parameters().get("hhArchiveBase")
|
|
|
|
hhbase = os.path.expanduser(hhbase)
|
|
|
|
hhdir = os.path.join(hhbase,site)
|
|
|
|
try:
|
|
|
|
out_path = os.path.join(hhdir, file.split(os.path.sep)[-2]+"-"+os.path.basename(file))
|
|
|
|
except:
|
|
|
|
out_path = os.path.join(hhdir, "x"+strftime("%d-%m-%y")+os.path.basename(file))
|
|
|
|
|
|
|
|
filter_name = filter.replace("ToFpdb", "")
|
|
|
|
|
|
|
|
mod = __import__(filter)
|
|
|
|
obj = getattr(mod, filter_name, None)
|
|
|
|
if callable(obj):
|
2009-08-02 05:03:35 +02:00
|
|
|
hhc = obj(in_path = file, out_path = out_path, index = 0) # Index into file 0 until changeover
|
2009-11-03 21:29:05 +01:00
|
|
|
if hhc.getStatus() and self.NEWIMPORT == False:
|
2009-07-31 22:24:21 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime) = self.import_fpdb_file(db, out_path, site, q)
|
2009-11-03 21:29:05 +01:00
|
|
|
elif hhc.getStatus() and self.NEWIMPORT == True:
|
2009-07-17 11:16:06 +02:00
|
|
|
#This code doesn't do anything yet
|
2009-07-21 23:26:23 +02:00
|
|
|
handlist = hhc.getProcessedHands()
|
|
|
|
self.pos_in_file[file] = hhc.getLastCharacterRead()
|
2009-07-17 11:16:06 +02:00
|
|
|
|
|
|
|
for hand in handlist:
|
2009-12-04 10:56:56 +01:00
|
|
|
#try, except duplicates here?
|
2009-08-02 06:19:33 +02:00
|
|
|
#hand.prepInsert()
|
|
|
|
hand.insert(self.database)
|
2009-12-04 10:56:56 +01:00
|
|
|
|
|
|
|
errors = getattr(hhc, 'numErrors')
|
|
|
|
stored = getattr(hhc, 'numHands')
|
2009-02-19 11:38:51 +01:00
|
|
|
else:
|
2009-06-20 08:02:03 +02:00
|
|
|
# conversion didn't work
|
|
|
|
# TODO: appropriate response?
|
2009-09-04 13:49:46 +02:00
|
|
|
return (0, 0, 0, 1, 0)
|
2009-06-20 08:02:03 +02:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.warning("Unknown filter filter_name:'%s' in filter:'%s'" %(filter_name, filter))
|
2009-09-04 13:49:46 +02:00
|
|
|
return (0, 0, 0, 1, 0)
|
2009-02-05 10:28:18 +01:00
|
|
|
|
|
|
|
#This will barf if conv.getStatus != True
|
2009-01-16 18:24:00 +01:00
|
|
|
return (stored, duplicates, partial, errors, ttime)
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
def import_fpdb_file(self, db, file, site, q):
|
2008-12-06 20:50:40 +01:00
|
|
|
starttime = time()
|
2009-03-26 23:55:16 +01:00
|
|
|
last_read_hand = 0
|
2008-12-06 20:50:40 +01:00
|
|
|
loc = 0
|
2009-07-31 22:24:21 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime) = (0, 0, 0, 0, 0)
|
2009-07-31 01:53:44 +02:00
|
|
|
# print "file =", file
|
2009-03-26 23:55:16 +01:00
|
|
|
if file == "stdin":
|
|
|
|
inputFile = sys.stdin
|
2008-12-06 20:50:40 +01:00
|
|
|
else:
|
2009-02-26 04:44:03 +01:00
|
|
|
if os.path.exists(file):
|
|
|
|
inputFile = open(file, "rU")
|
|
|
|
else:
|
2009-02-26 05:17:36 +01:00
|
|
|
self.removeFromFileList[file] = True
|
|
|
|
return (0, 0, 0, 1, 0)
|
2009-02-25 18:40:39 +01:00
|
|
|
try:
|
|
|
|
loc = self.pos_in_file[file]
|
2009-06-07 21:45:09 +02:00
|
|
|
#size = os.path.getsize(file)
|
|
|
|
#print "loc =", loc, 'size =', size
|
2009-11-04 16:46:36 +01:00
|
|
|
except KeyError:
|
2009-02-26 04:44:03 +01:00
|
|
|
pass
|
2008-12-06 20:50:40 +01:00
|
|
|
# Read input file into class and close file
|
|
|
|
inputFile.seek(loc)
|
2009-06-07 21:45:09 +02:00
|
|
|
#tmplines = inputFile.readlines()
|
|
|
|
#if tmplines == None or tmplines == []:
|
|
|
|
# print "tmplines = ", tmplines
|
|
|
|
#else:
|
|
|
|
# print "tmplines[0] =", tmplines[0]
|
2009-03-26 23:55:16 +01:00
|
|
|
self.lines = fpdb_simple.removeTrailingEOL(inputFile.readlines())
|
2008-12-06 20:50:40 +01:00
|
|
|
self.pos_in_file[file] = inputFile.tell()
|
|
|
|
inputFile.close()
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime, handsId) = self.import_fpdb_lines(db, self.lines, starttime, file, site, q)
|
|
|
|
|
|
|
|
db.commit()
|
|
|
|
ttime = time() - starttime
|
2009-11-03 20:30:52 +01:00
|
|
|
if q is None:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.info("Total stored: %(stored)d\tduplicates:%(duplicates)d\terrors:%(errors)d\ttime:%(ttime)s" % locals())
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
if not stored:
|
|
|
|
if duplicates:
|
|
|
|
for line_no in xrange(len(self.lines)):
|
2009-11-03 21:29:05 +01:00
|
|
|
if self.lines[line_no].find("Game #") != -1:
|
|
|
|
final_game_line = self.lines[line_no]
|
2009-07-31 22:24:21 +02:00
|
|
|
handsId=fpdb_simple.parseSiteHandNo(final_game_line)
|
|
|
|
else:
|
|
|
|
print "failed to read a single hand from file:", inputFile
|
2009-11-03 21:29:05 +01:00
|
|
|
handsId = 0
|
2009-07-31 22:24:21 +02:00
|
|
|
#todo: this will cause return of an unstored hand number if the last hand was error
|
2009-11-03 21:29:05 +01:00
|
|
|
self.handsId = handsId
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
return (stored, duplicates, partial, errors, ttime)
|
|
|
|
# end def import_fpdb_file
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
def import_fpdb_lines(self, db, lines, starttime, file, site, q = None):
|
|
|
|
"""Import an fpdb hand history held in the list lines, could be one hand or many"""
|
|
|
|
|
|
|
|
#db.lock_for_insert() # should be ok when using one thread, but doesn't help??
|
2009-09-10 05:10:55 +02:00
|
|
|
while gtk.events_pending():
|
|
|
|
gtk.main_iteration(False)
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
try: # sometimes we seem to be getting an empty self.lines, in which case, we just want to return.
|
2009-07-31 22:24:21 +02:00
|
|
|
firstline = lines[0]
|
2008-12-06 20:50:40 +01:00
|
|
|
except:
|
2009-06-07 21:45:09 +02:00
|
|
|
# just skip the debug message and return silently:
|
2009-07-31 22:24:21 +02:00
|
|
|
#print "DEBUG: import_fpdb_file: failed on lines[0]: '%s' '%s' '%s' '%s' " %( file, site, lines, loc)
|
2009-08-01 01:07:31 +02:00
|
|
|
return (0,0,0,1,0,0)
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-11-03 21:29:05 +01:00
|
|
|
if "Tournament Summary" in firstline:
|
2008-12-06 20:50:40 +01:00
|
|
|
print "TODO: implement importing tournament summaries"
|
|
|
|
#self.faobs = readfile(inputFile)
|
|
|
|
#self.parseTourneyHistory()
|
2009-08-01 01:07:31 +02:00
|
|
|
return (0,0,0,1,0,0)
|
2009-03-19 16:27:08 +01:00
|
|
|
|
2009-11-03 21:29:05 +01:00
|
|
|
category = fpdb_simple.recogniseCategory(firstline)
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-03-26 23:55:16 +01:00
|
|
|
startpos = 0
|
|
|
|
stored = 0 #counter
|
|
|
|
duplicates = 0 #counter
|
|
|
|
partial = 0 #counter
|
|
|
|
errors = 0 #counter
|
2009-08-01 01:07:31 +02:00
|
|
|
ttime = 0
|
|
|
|
handsId = 0
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-11-03 21:29:05 +01:00
|
|
|
for i in xrange(len(lines)):
|
|
|
|
if len(lines[i]) < 2: #Wierd way to detect for '\r\n' or '\n'
|
|
|
|
endpos = i
|
|
|
|
hand = lines[startpos:endpos]
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-11-03 21:29:05 +01:00
|
|
|
if len(hand[0]) < 2:
|
2008-12-06 20:50:40 +01:00
|
|
|
hand=hand[1:]
|
2009-03-10 14:00:37 +01:00
|
|
|
|
2009-11-03 21:29:05 +01:00
|
|
|
if len(hand) < 3:
|
2008-12-06 20:50:40 +01:00
|
|
|
pass
|
2009-03-10 14:00:37 +01:00
|
|
|
#TODO: This is ugly - we didn't actually find the start of the
|
|
|
|
# hand with the outer loop so we test again...
|
|
|
|
else:
|
2009-11-03 21:29:05 +01:00
|
|
|
isTourney = fpdb_simple.isTourney(hand[0])
|
2008-12-06 20:50:40 +01:00
|
|
|
if not isTourney:
|
2009-03-24 13:46:18 +01:00
|
|
|
hand = fpdb_simple.filterAnteBlindFold(hand)
|
2009-11-03 21:29:05 +01:00
|
|
|
self.hand = hand
|
2009-03-10 14:00:37 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
try:
|
2009-07-31 22:24:21 +02:00
|
|
|
handsId = fpdb_parse_logic.mainParser( self.settings, self.siteIds[site]
|
|
|
|
, category, hand, self.config
|
|
|
|
, db, q )
|
|
|
|
db.commit()
|
2009-03-10 14:00:37 +01:00
|
|
|
|
2009-03-26 23:55:16 +01:00
|
|
|
stored += 1
|
2008-12-06 20:50:40 +01:00
|
|
|
if self.callHud:
|
|
|
|
#print "call to HUD here. handsId:",handsId
|
|
|
|
#pipe the Hands.id out to the HUD
|
2009-11-25 14:22:14 +01:00
|
|
|
# print "fpdb_import: sending hand to hud", handsId, "pipe =", self.caller.pipe_to_hud
|
2008-12-06 20:50:40 +01:00
|
|
|
self.caller.pipe_to_hud.stdin.write("%s" % (handsId) + os.linesep)
|
2009-09-11 07:12:46 +02:00
|
|
|
except Exceptions.DuplicateError:
|
2009-03-26 23:55:16 +01:00
|
|
|
duplicates += 1
|
2009-07-31 22:24:21 +02:00
|
|
|
db.rollback()
|
2008-12-06 20:50:40 +01:00
|
|
|
except (ValueError), fe:
|
2009-03-26 23:55:16 +01:00
|
|
|
errors += 1
|
2008-12-06 20:50:40 +01:00
|
|
|
self.printEmailErrorMessage(errors, file, hand)
|
2009-03-10 14:00:37 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
if (self.settings['failOnError']):
|
2009-07-31 22:24:21 +02:00
|
|
|
db.commit() #dont remove this, in case hand processing was cancelled.
|
2008-12-06 20:50:40 +01:00
|
|
|
raise
|
2009-04-26 22:47:28 +02:00
|
|
|
else:
|
2009-07-31 22:24:21 +02:00
|
|
|
db.rollback()
|
2008-12-06 20:50:40 +01:00
|
|
|
except (fpdb_simple.FpdbError), fe:
|
2009-03-26 23:55:16 +01:00
|
|
|
errors += 1
|
2008-12-06 20:50:40 +01:00
|
|
|
self.printEmailErrorMessage(errors, file, hand)
|
2009-07-31 22:24:21 +02:00
|
|
|
db.rollback()
|
2009-03-10 14:00:37 +01:00
|
|
|
|
2009-03-26 23:55:16 +01:00
|
|
|
if self.settings['failOnError']:
|
2009-07-31 22:24:21 +02:00
|
|
|
db.commit() #dont remove this, in case hand processing was cancelled.
|
2008-12-06 20:50:40 +01:00
|
|
|
raise
|
2009-03-27 16:54:13 +01:00
|
|
|
|
2009-03-26 23:55:16 +01:00
|
|
|
if self.settings['minPrint']:
|
2009-03-27 16:54:13 +01:00
|
|
|
if not ((stored+duplicates+errors) % self.settings['minPrint']):
|
2009-07-31 22:24:21 +02:00
|
|
|
print "stored:", stored, " duplicates:", duplicates, "errors:", errors
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-03-26 23:55:16 +01:00
|
|
|
if self.settings['handCount']:
|
2009-03-27 16:54:13 +01:00
|
|
|
if ((stored+duplicates+errors) >= self.settings['handCount']):
|
2009-03-26 23:55:16 +01:00
|
|
|
if not self.settings['quiet']:
|
2008-12-06 20:50:40 +01:00
|
|
|
print "quitting due to reaching the amount of hands to be imported"
|
2009-03-10 14:00:37 +01:00
|
|
|
print "Total stored:", stored, "duplicates:", duplicates, "errors:", errors, " time:", (time() - starttime)
|
2008-12-06 20:50:40 +01:00
|
|
|
sys.exit(0)
|
2009-03-26 23:55:16 +01:00
|
|
|
startpos = endpos
|
2009-07-31 22:24:21 +02:00
|
|
|
return (stored, duplicates, partial, errors, ttime, handsId)
|
|
|
|
# end def import_fpdb_lines
|
2008-08-04 05:44:28 +02:00
|
|
|
|
2008-12-18 23:39:43 +01:00
|
|
|
def printEmailErrorMessage(self, errors, filename, line):
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
|
|
|
print "Error No.",errors,", please send the hand causing this to steffen@sycamoretest.info so I can fix it."
|
|
|
|
print "Filename:", filename
|
|
|
|
print "Here is the first line so you can identify it. Please mention that the error was a ValueError:"
|
|
|
|
print self.hand[0]
|
|
|
|
print "Hand logged to hand-errors.txt"
|
|
|
|
logfile = open('hand-errors.txt', 'a')
|
|
|
|
for s in self.hand:
|
|
|
|
logfile.write(str(s) + "\n")
|
|
|
|
logfile.write("\n")
|
|
|
|
logfile.close()
|
2008-08-04 05:44:28 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2008-12-06 20:50:40 +01:00
|
|
|
print "CLI for fpdb_import is now available as CliFpdb.py"
|