2010-07-08 20:01:03 +02:00
|
|
|
#!/usr/bin/python
|
2010-07-02 23:48:01 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
2008-08-04 05:44:28 +02:00
|
|
|
|
2010-07-04 03:05:16 +02:00
|
|
|
#Copyright 2008-2010 Steffen Schaumburg
|
2008-08-04 05:44:28 +02:00
|
|
|
#This program is free software: you can redistribute it and/or modify
|
|
|
|
#it under the terms of the GNU Affero General Public License as published by
|
|
|
|
#the Free Software Foundation, version 3 of the License.
|
|
|
|
#
|
|
|
|
#This program is distributed in the hope that it will be useful,
|
|
|
|
#but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
#GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
#You should have received a copy of the GNU Affero General Public License
|
|
|
|
#along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2010-07-04 03:05:16 +02:00
|
|
|
#In the "official" distribution you can find the license in agpl-3.0.txt.
|
2008-08-04 05:44:28 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# Standard Library modules
|
|
|
|
|
|
|
|
import os # todo: remove this once import_dir is in fpdb_import
|
2008-08-04 05:44:28 +02:00
|
|
|
import sys
|
2009-11-30 15:08:30 +01:00
|
|
|
from time import time, strftime, sleep, clock
|
2009-02-07 16:06:48 +01:00
|
|
|
import traceback
|
|
|
|
import math
|
|
|
|
import datetime
|
|
|
|
import re
|
2009-07-31 22:24:21 +02:00
|
|
|
import Queue
|
|
|
|
from collections import deque # using Queue for now
|
|
|
|
import threading
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2010-02-01 22:03:51 +01:00
|
|
|
import logging
|
|
|
|
# logging has been set up in fpdb.py or HUD_main.py, use their settings:
|
|
|
|
log = logging.getLogger("importer")
|
|
|
|
|
2009-09-10 05:10:55 +02:00
|
|
|
import pygtk
|
|
|
|
import gtk
|
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# fpdb/FreePokerTools modules
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2009-06-26 00:14:32 +02:00
|
|
|
import Database
|
2009-02-07 16:06:48 +01:00
|
|
|
import Configuration
|
2009-09-11 07:12:46 +02:00
|
|
|
import Exceptions
|
2009-02-07 16:06:48 +01:00
|
|
|
|
2009-08-12 02:46:39 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# database interface modules
|
2008-09-16 23:19:50 +02:00
|
|
|
try:
|
2008-12-06 20:50:40 +01:00
|
|
|
import MySQLdb
|
2009-09-16 04:07:31 +02:00
|
|
|
except ImportError:
|
2009-09-16 03:10:18 +02:00
|
|
|
log.debug("Import database module: MySQLdb not found")
|
|
|
|
else:
|
|
|
|
mysqlLibFound = True
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2008-09-16 23:19:50 +02:00
|
|
|
try:
|
2008-12-06 20:50:40 +01:00
|
|
|
import psycopg2
|
2009-09-16 04:07:31 +02:00
|
|
|
except ImportError:
|
2009-09-16 03:10:18 +02:00
|
|
|
log.debug("Import database module: psycopg2 not found")
|
|
|
|
else:
|
2009-07-31 22:24:21 +02:00
|
|
|
import psycopg2.extensions
|
2009-06-06 17:17:49 +02:00
|
|
|
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2008-10-08 19:36:08 +02:00
|
|
|
class Importer:
|
2009-07-29 07:37:06 +02:00
|
|
|
def __init__(self, caller, settings, config, sql = None):
|
2008-12-06 20:50:40 +01:00
|
|
|
"""Constructor"""
|
2009-03-19 02:22:04 +01:00
|
|
|
self.settings = settings
|
|
|
|
self.caller = caller
|
|
|
|
self.config = config
|
2009-07-29 07:37:06 +02:00
|
|
|
self.sql = sql
|
|
|
|
|
2010-02-01 23:31:00 +01:00
|
|
|
#log = Configuration.get_logger("logging.conf", "importer", log_dir=self.config.dir_log)
|
2009-03-19 02:22:04 +01:00
|
|
|
self.filelist = {}
|
|
|
|
self.dirlist = {}
|
2009-03-21 13:23:51 +01:00
|
|
|
self.siteIds = {}
|
2009-02-19 11:38:51 +01:00
|
|
|
self.addToDirList = {}
|
2009-02-26 04:44:03 +01:00
|
|
|
self.removeFromFileList = {} # to remove deleted files
|
2009-03-19 02:22:04 +01:00
|
|
|
self.monitor = False
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize = {}
|
|
|
|
self.updatedtime = {}
|
2009-03-19 02:22:04 +01:00
|
|
|
self.lines = None
|
2009-03-21 13:23:51 +01:00
|
|
|
self.faobs = None # File as one big string
|
|
|
|
self.pos_in_file = {} # dict to remember how far we have read in the file
|
2008-12-06 20:50:40 +01:00
|
|
|
#Set defaults
|
2009-03-19 02:22:04 +01:00
|
|
|
self.callHud = self.config.get_import_parameters().get("callFpdbHud")
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-08-04 22:41:04 +02:00
|
|
|
# CONFIGURATION OPTIONS
|
2009-03-26 23:55:16 +01:00
|
|
|
self.settings.setdefault("minPrint", 30)
|
|
|
|
self.settings.setdefault("handCount", 0)
|
2009-08-04 22:41:04 +02:00
|
|
|
#self.settings.setdefault("allowHudcacheRebuild", True) # NOT USED NOW
|
|
|
|
#self.settings.setdefault("forceThreads", 2) # NOT USED NOW
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings.setdefault("writeQSize", 1000) # no need to change
|
|
|
|
self.settings.setdefault("writeQMaxWait", 10) # not used
|
2009-08-06 22:12:50 +02:00
|
|
|
self.settings.setdefault("dropIndexes", "don't drop")
|
|
|
|
self.settings.setdefault("dropHudCache", "don't drop")
|
2009-12-17 11:42:50 +01:00
|
|
|
self.settings.setdefault("starsArchive", False)
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
self.writeq = None
|
|
|
|
self.database = Database.Database(self.config, sql = self.sql)
|
|
|
|
self.writerdbs = []
|
2009-08-04 22:41:04 +02:00
|
|
|
self.settings.setdefault("threads", 1) # value set by GuiBulkImport
|
2009-07-31 22:24:21 +02:00
|
|
|
for i in xrange(self.settings['threads']):
|
|
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-11-30 15:08:30 +01:00
|
|
|
clock() # init clock in windows
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
#Set functions
|
|
|
|
def setCallHud(self, value):
|
|
|
|
self.callHud = value
|
|
|
|
|
|
|
|
def setMinPrint(self, value):
|
|
|
|
self.settings['minPrint'] = int(value)
|
|
|
|
|
|
|
|
def setHandCount(self, value):
|
|
|
|
self.settings['handCount'] = int(value)
|
|
|
|
|
|
|
|
def setQuiet(self, value):
|
|
|
|
self.settings['quiet'] = value
|
|
|
|
|
|
|
|
def setFailOnError(self, value):
|
|
|
|
self.settings['failOnError'] = value
|
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
def setHandsInDB(self, value):
|
|
|
|
self.settings['handsInDB'] = value
|
|
|
|
|
|
|
|
def setThreads(self, value):
|
|
|
|
self.settings['threads'] = value
|
2009-07-31 22:24:21 +02:00
|
|
|
if self.settings["threads"] > len(self.writerdbs):
|
|
|
|
for i in xrange(self.settings['threads'] - len(self.writerdbs)):
|
|
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
2009-02-07 16:06:48 +01:00
|
|
|
|
|
|
|
def setDropIndexes(self, value):
|
|
|
|
self.settings['dropIndexes'] = value
|
|
|
|
|
2009-08-04 22:41:04 +02:00
|
|
|
def setDropHudCache(self, value):
|
|
|
|
self.settings['dropHudCache'] = value
|
|
|
|
|
2009-12-17 11:42:50 +01:00
|
|
|
def setStarsArchive(self, value):
|
|
|
|
self.settings['starsArchive'] = value
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
# def setWatchTime(self):
|
|
|
|
# self.updated = time()
|
|
|
|
|
|
|
|
def clearFileList(self):
|
2009-08-06 05:30:21 +02:00
|
|
|
self.updatedsize = {}
|
|
|
|
self.updatetime = {}
|
|
|
|
self.pos_in_file = {}
|
2008-12-06 20:50:40 +01:00
|
|
|
self.filelist = {}
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
def closeDBs(self):
|
|
|
|
self.database.disconnect()
|
|
|
|
for i in xrange(len(self.writerdbs)):
|
|
|
|
self.writerdbs[i].disconnect()
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
#Add an individual file to filelist
|
|
|
|
def addImportFile(self, filename, site = "default", filter = "passthrough"):
|
2009-02-07 16:06:48 +01:00
|
|
|
#TODO: test it is a valid file -> put that in config!!
|
2010-07-12 18:04:30 +02:00
|
|
|
if filename in self.filelist or not os.path.exists(unicode(filename,'utf-8')):
|
2009-09-11 07:12:46 +02:00
|
|
|
return
|
2008-12-06 20:50:40 +01:00
|
|
|
self.filelist[filename] = [site] + [filter]
|
2009-03-21 13:23:51 +01:00
|
|
|
if site not in self.siteIds:
|
|
|
|
# Get id from Sites table in DB
|
2009-07-29 00:58:10 +02:00
|
|
|
result = self.database.get_site_id(site)
|
2009-03-21 13:23:51 +01:00
|
|
|
if len(result) == 1:
|
|
|
|
self.siteIds[site] = result[0][0]
|
|
|
|
else:
|
|
|
|
if len(result) == 0:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.error("Database ID for %s not found" % site)
|
2009-03-21 13:23:51 +01:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.error("[ERROR] More than 1 Database ID found for %s - Multiple currencies not implemented yet" % site)
|
2009-03-21 13:23:51 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-02-25 15:59:11 +01:00
|
|
|
# Called from GuiBulkImport to add a file or directory.
|
2009-03-24 14:58:45 +01:00
|
|
|
def addBulkImportImportFileOrDir(self, inputPath, site = "PokerStars"):
|
2009-02-25 15:59:11 +01:00
|
|
|
"""Add a file or directory for bulk import"""
|
2009-03-24 14:58:45 +01:00
|
|
|
filter = self.config.hhcs[site].converter
|
2009-02-25 15:59:11 +01:00
|
|
|
# Bulk import never monitors
|
|
|
|
# if directory, add all files in it. Otherwise add single file.
|
|
|
|
# TODO: only add sane files?
|
|
|
|
if os.path.isdir(inputPath):
|
|
|
|
for subdir in os.walk(inputPath):
|
|
|
|
for file in subdir[2]:
|
2009-11-03 21:29:05 +01:00
|
|
|
self.addImportFile(os.path.join(subdir[0], file), site=site,
|
|
|
|
filter=filter)
|
2009-02-25 15:59:11 +01:00
|
|
|
else:
|
2009-03-24 14:58:45 +01:00
|
|
|
self.addImportFile(inputPath, site=site, filter=filter)
|
2008-12-06 20:50:40 +01:00
|
|
|
#Add a directory of files to filelist
|
|
|
|
#Only one import directory per site supported.
|
|
|
|
#dirlist is a hash of lists:
|
|
|
|
#dirlist{ 'PokerStars' => ["/path/to/import/", "filtername"] }
|
2009-11-03 21:29:05 +01:00
|
|
|
def addImportDirectory(self,dir,monitor=False, site="default", filter="passthrough"):
|
2009-03-05 02:12:15 +01:00
|
|
|
#gets called by GuiAutoImport.
|
2009-02-24 14:46:05 +01:00
|
|
|
#This should really be using os.walk
|
|
|
|
#http://docs.python.org/library/os.html
|
2008-12-06 20:50:40 +01:00
|
|
|
if os.path.isdir(dir):
|
|
|
|
if monitor == True:
|
|
|
|
self.monitor = True
|
|
|
|
self.dirlist[site] = [dir] + [filter]
|
|
|
|
|
2009-06-07 21:45:09 +02:00
|
|
|
#print "addImportDirectory: checking files in", dir
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in os.listdir(dir):
|
2009-06-07 21:45:09 +02:00
|
|
|
#print " adding file ", file
|
2008-12-06 20:50:40 +01:00
|
|
|
self.addImportFile(os.path.join(dir, file), site, filter)
|
|
|
|
else:
|
2009-11-03 21:29:05 +01:00
|
|
|
log.warning("Attempted to add non-directory: '%s' as an import directory" % str(dir))
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
def runImport(self):
|
2009-07-31 22:24:21 +02:00
|
|
|
""""Run full import on self.filelist. This is called from GuiBulkImport.py"""
|
2009-08-04 22:41:04 +02:00
|
|
|
#if self.settings['forceThreads'] > 0: # use forceThreads until threading enabled in GuiBulkImport
|
|
|
|
# self.setThreads(self.settings['forceThreads'])
|
2009-07-21 23:26:23 +02:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
# Initial setup
|
2009-02-07 16:06:48 +01:00
|
|
|
start = datetime.datetime.now()
|
2009-07-31 22:24:21 +02:00
|
|
|
starttime = time()
|
2009-08-12 02:46:39 +02:00
|
|
|
log.info("Started at %s -- %d files to import. indexes: %s" % (start, len(self.filelist), self.settings['dropIndexes']))
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['dropIndexes'] == 'auto':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings['dropIndexes'] = self.calculate_auto2(self.database, 12.0, 500.0)
|
2009-08-06 03:07:16 +02:00
|
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'auto':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings['dropHudCache'] = self.calculate_auto2(self.database, 25.0, 500.0) # returns "drop"/"don't drop"
|
2009-07-21 23:26:23 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['dropIndexes'] == 'drop':
|
2009-07-29 00:58:10 +02:00
|
|
|
self.database.prepareBulkImport()
|
2009-07-21 23:26:23 +02:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.debug("No need to drop indexes.")
|
2009-07-21 23:26:23 +02:00
|
|
|
#print "dropInd =", self.settings['dropIndexes'], " dropHudCache =", self.settings['dropHudCache']
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
if self.settings['threads'] <= 0:
|
|
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(self.database, None)
|
|
|
|
else:
|
|
|
|
# create queue (will probably change to deque at some point):
|
|
|
|
self.writeq = Queue.Queue( self.settings['writeQSize'] )
|
|
|
|
# start separate thread(s) to read hands from queue and write to db:
|
|
|
|
for i in xrange(self.settings['threads']):
|
|
|
|
t = threading.Thread( target=self.writerdbs[i].insert_queue_hands
|
|
|
|
, args=(self.writeq, self.settings["writeQMaxWait"])
|
|
|
|
, name="dbwriter-"+str(i) )
|
|
|
|
t.setDaemon(True)
|
|
|
|
t.start()
|
|
|
|
# read hands and write to q:
|
|
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(self.database, self.writeq)
|
|
|
|
|
|
|
|
if self.writeq.empty():
|
|
|
|
print "writers finished already"
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
print "waiting for writers to finish ..."
|
|
|
|
#for t in threading.enumerate():
|
|
|
|
# print " "+str(t)
|
|
|
|
#self.writeq.join()
|
|
|
|
#using empty() might be more reliable:
|
|
|
|
while not self.writeq.empty() and len(threading.enumerate()) > 1:
|
2009-11-25 14:22:14 +01:00
|
|
|
# TODO: Do we need to actually tell the progress indicator to move, or is it already moving, and we just need to process events...
|
2009-11-03 21:29:05 +01:00
|
|
|
while gtk.events_pending(): # see http://faq.pygtk.org/index.py?req=index for more hints (3.7)
|
2009-11-25 14:22:14 +01:00
|
|
|
gtk.main_iteration(False)
|
2009-07-31 22:24:21 +02:00
|
|
|
sleep(0.5)
|
|
|
|
print " ... writers finished"
|
|
|
|
|
|
|
|
# Tidying up after import
|
|
|
|
if self.settings['dropIndexes'] == 'drop':
|
|
|
|
self.database.afterBulkImport()
|
|
|
|
else:
|
|
|
|
print "No need to rebuild indexes."
|
2009-08-06 03:07:16 +02:00
|
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'drop':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.database.rebuild_hudcache()
|
|
|
|
else:
|
|
|
|
print "No need to rebuild hudcache."
|
|
|
|
self.database.analyzeDB()
|
|
|
|
endtime = time()
|
|
|
|
return (totstored, totdups, totpartial, toterrors, endtime-starttime)
|
|
|
|
# end def runImport
|
|
|
|
|
|
|
|
def importFiles(self, db, q):
|
|
|
|
""""Read filenames in self.filelist and pass to import_file_dict().
|
|
|
|
Uses a separate database connection if created as a thread (caller
|
|
|
|
passes None or no param as db)."""
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-01-16 18:24:00 +01:00
|
|
|
totstored = 0
|
|
|
|
totdups = 0
|
|
|
|
totpartial = 0
|
|
|
|
toterrors = 0
|
|
|
|
tottime = 0
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in self.filelist:
|
2009-07-31 22:24:21 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(db, file
|
|
|
|
,self.filelist[file][0], self.filelist[file][1], q)
|
2009-01-16 18:24:00 +01:00
|
|
|
totstored += stored
|
|
|
|
totdups += duplicates
|
|
|
|
totpartial += partial
|
|
|
|
toterrors += errors
|
2009-02-07 16:06:48 +01:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
for i in xrange( self.settings['threads'] ):
|
|
|
|
print "sending finish msg qlen =", q.qsize()
|
|
|
|
db.send_finish_msg(q)
|
|
|
|
|
|
|
|
return (totstored, totdups, totpartial, toterrors)
|
|
|
|
# end def importFiles
|
|
|
|
|
|
|
|
# not used currently
|
|
|
|
def calculate_auto(self, db):
|
2009-02-07 16:06:48 +01:00
|
|
|
"""An heuristic to determine a reasonable value of drop/don't drop"""
|
2009-03-21 15:27:49 +01:00
|
|
|
if len(self.filelist) == 1: return "don't drop"
|
|
|
|
if 'handsInDB' not in self.settings:
|
|
|
|
try:
|
2009-07-31 22:24:21 +02:00
|
|
|
tmpcursor = db.get_cursor()
|
2009-03-21 15:27:49 +01:00
|
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
|
|
except:
|
|
|
|
pass # if this fails we're probably doomed anyway
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['handsInDB'] < 5000: return "drop"
|
2009-11-25 14:22:14 +01:00
|
|
|
if len(self.filelist) < 50: return "don't drop"
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['handsInDB'] > 50000: return "don't drop"
|
|
|
|
return "drop"
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
def calculate_auto2(self, db, scale, increment):
|
2009-06-09 23:22:10 +02:00
|
|
|
"""A second heuristic to determine a reasonable value of drop/don't drop
|
|
|
|
This one adds up size of files to import to guess number of hands in them
|
|
|
|
Example values of scale and increment params might be 10 and 500 meaning
|
|
|
|
roughly: drop if importing more than 10% (100/scale) of hands in db or if
|
|
|
|
less than 500 hands in db"""
|
|
|
|
size_per_hand = 1300.0 # wag based on a PS 6-up FLHE file. Actual value not hugely important
|
|
|
|
# as values of scale and increment compensate for it anyway.
|
|
|
|
# decimal used to force float arithmetic
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-09 23:22:10 +02:00
|
|
|
# get number of hands in db
|
|
|
|
if 'handsInDB' not in self.settings:
|
|
|
|
try:
|
2009-07-31 22:24:21 +02:00
|
|
|
tmpcursor = db.get_cursor()
|
2009-06-09 23:22:10 +02:00
|
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
|
|
except:
|
|
|
|
pass # if this fails we're probably doomed anyway
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-09 23:22:10 +02:00
|
|
|
# add up size of import files
|
|
|
|
total_size = 0.0
|
|
|
|
for file in self.filelist:
|
|
|
|
if os.path.exists(file):
|
|
|
|
stat_info = os.stat(file)
|
|
|
|
total_size += stat_info.st_size
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
# if hands_in_db is zero or very low, we want to drop indexes, otherwise compare
|
2009-06-09 23:22:10 +02:00
|
|
|
# import size with db size somehow:
|
2009-07-21 23:26:23 +02:00
|
|
|
ret = "don't drop"
|
2009-06-09 23:22:10 +02:00
|
|
|
if self.settings['handsInDB'] < scale * (total_size/size_per_hand) + increment:
|
2009-07-21 23:26:23 +02:00
|
|
|
ret = "drop"
|
|
|
|
#print "auto2: handsindb =", self.settings['handsInDB'], "total_size =", total_size, "size_per_hand =", \
|
|
|
|
# size_per_hand, "inc =", increment, "return:", ret
|
|
|
|
return ret
|
2009-06-09 23:22:10 +02:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
#Run import on updated files, then store latest update time. Called from GuiAutoImport.py
|
2008-12-06 20:50:40 +01:00
|
|
|
def runUpdated(self):
|
2009-02-26 16:36:23 +01:00
|
|
|
#Check for new files in monitored directories
|
2008-12-06 20:50:40 +01:00
|
|
|
#todo: make efficient - always checks for new file, should be able to use mtime of directory
|
|
|
|
# ^^ May not work on windows
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog = open('runUpdated.txt', 'a')
|
|
|
|
#rulog.writelines("runUpdated ... ")
|
2008-12-06 20:50:40 +01:00
|
|
|
for site in self.dirlist:
|
|
|
|
self.addImportDirectory(self.dirlist[site][0], False, site, self.dirlist[site][1])
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in self.filelist:
|
2009-03-19 16:27:08 +01:00
|
|
|
if os.path.exists(file):
|
2009-03-13 11:21:48 +01:00
|
|
|
stat_info = os.stat(file)
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog.writelines("path exists ")
|
2009-07-31 07:15:28 +02:00
|
|
|
if file in self.updatedsize: # we should be able to assume that if we're in size, we're in time as well
|
|
|
|
if stat_info.st_size > self.updatedsize[file] or stat_info.st_mtime > self.updatedtime[file]:
|
2009-09-11 07:12:46 +02:00
|
|
|
# print "file",counter," updated", os.path.basename(file), stat_info.st_size, self.updatedsize[file], stat_info.st_mtime, self.updatedtime[file]
|
2009-11-30 05:52:36 +01:00
|
|
|
try:
|
|
|
|
if not os.path.isdir(file):
|
2009-11-30 14:14:03 +01:00
|
|
|
self.caller.addText("\n"+os.path.basename(file))
|
2009-11-30 05:52:36 +01:00
|
|
|
except KeyError: # TODO: What error happens here?
|
|
|
|
pass
|
2009-11-30 14:14:03 +01:00
|
|
|
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(self.database, file, self.filelist[file][0], self.filelist[file][1], None)
|
|
|
|
try:
|
2010-01-28 11:19:19 +01:00
|
|
|
if not os.path.isdir(file): # Note: This assumes that whatever calls us has an "addText" func
|
2009-11-30 15:08:30 +01:00
|
|
|
self.caller.addText(" %d stored, %d duplicates, %d partial, %d errors (time = %f)" % (stored, duplicates, partial, errors, ttime))
|
2009-11-30 14:14:03 +01:00
|
|
|
except KeyError: # TODO: Again, what error happens here? fix when we find out ..
|
|
|
|
pass
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = stat_info.st_size
|
|
|
|
self.updatedtime[file] = time()
|
2009-07-31 03:50:08 +02:00
|
|
|
else:
|
2009-03-13 11:21:48 +01:00
|
|
|
if os.path.isdir(file) or (time() - stat_info.st_mtime) < 60:
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = 0
|
|
|
|
self.updatedtime[file] = 0
|
2009-07-31 03:50:08 +02:00
|
|
|
else:
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = stat_info.st_size
|
|
|
|
self.updatedtime[file] = time()
|
2009-03-13 11:21:48 +01:00
|
|
|
else:
|
2009-05-27 23:21:22 +02:00
|
|
|
self.removeFromFileList[file] = True
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-03-10 14:00:37 +01:00
|
|
|
self.addToDirList = filter(lambda x: self.addImportDirectory(x, True, self.addToDirList[x][0], self.addToDirList[x][1]), self.addToDirList)
|
2009-03-27 16:54:13 +01:00
|
|
|
|
2009-02-26 04:44:03 +01:00
|
|
|
for file in self.removeFromFileList:
|
2009-02-26 05:17:36 +01:00
|
|
|
if file in self.filelist:
|
|
|
|
del self.filelist[file]
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-02-19 11:38:51 +01:00
|
|
|
self.addToDirList = {}
|
2009-02-26 04:44:03 +01:00
|
|
|
self.removeFromFileList = {}
|
2009-07-29 00:58:10 +02:00
|
|
|
self.database.rollback()
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog.writelines(" finished\n")
|
|
|
|
#rulog.close()
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
# This is now an internal function that should not be called directly.
|
2009-07-31 22:24:21 +02:00
|
|
|
def import_file_dict(self, db, file, site, filter, q=None):
|
2009-07-29 00:58:10 +02:00
|
|
|
#print "import_file_dict"
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-02-19 11:38:51 +01:00
|
|
|
if os.path.isdir(file):
|
|
|
|
self.addToDirList[file] = [site] + [filter]
|
2009-11-30 14:14:03 +01:00
|
|
|
return (0,0,0,0,0)
|
2009-03-13 21:00:12 +01:00
|
|
|
|
2009-06-20 08:02:03 +02:00
|
|
|
conv = None
|
2010-01-28 11:19:19 +01:00
|
|
|
(stored, duplicates, partial, errors, ttime) = (0, 0, 0, 0, time())
|
2009-07-31 22:24:21 +02:00
|
|
|
|
2010-07-12 18:04:30 +02:00
|
|
|
file = file.decode("utf-8") #(Configuration.LOCALE_ENCODING)
|
2009-08-12 19:55:19 +02:00
|
|
|
|
2009-06-20 08:02:03 +02:00
|
|
|
# Load filter, process file, pass returned filename to import_fpdb_file
|
2009-11-03 20:30:52 +01:00
|
|
|
if self.settings['threads'] > 0 and self.writeq is not None:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.info("Converting " + file + " (" + str(q.qsize()) + ")")
|
2009-08-01 01:07:31 +02:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.info("Converting " + file)
|
2009-06-20 08:02:03 +02:00
|
|
|
hhbase = self.config.get_import_parameters().get("hhArchiveBase")
|
|
|
|
hhbase = os.path.expanduser(hhbase)
|
|
|
|
hhdir = os.path.join(hhbase,site)
|
|
|
|
try:
|
|
|
|
out_path = os.path.join(hhdir, file.split(os.path.sep)[-2]+"-"+os.path.basename(file))
|
|
|
|
except:
|
|
|
|
out_path = os.path.join(hhdir, "x"+strftime("%d-%m-%y")+os.path.basename(file))
|
|
|
|
|
|
|
|
filter_name = filter.replace("ToFpdb", "")
|
|
|
|
|
|
|
|
mod = __import__(filter)
|
|
|
|
obj = getattr(mod, filter_name, None)
|
|
|
|
if callable(obj):
|
2009-12-22 14:40:37 +01:00
|
|
|
idx = 0
|
|
|
|
if file in self.pos_in_file:
|
|
|
|
idx = self.pos_in_file[file]
|
|
|
|
else:
|
|
|
|
self.pos_in_file[file] = 0
|
2010-01-31 12:25:24 +01:00
|
|
|
hhc = obj(self.config, in_path = file, out_path = out_path, index = idx, starsArchive = self.settings['starsArchive'])
|
2010-01-23 06:37:41 +01:00
|
|
|
if hhc.getStatus():
|
2009-07-21 23:26:23 +02:00
|
|
|
handlist = hhc.getProcessedHands()
|
|
|
|
self.pos_in_file[file] = hhc.getLastCharacterRead()
|
2009-12-16 19:24:57 +01:00
|
|
|
to_hud = []
|
2009-07-17 11:16:06 +02:00
|
|
|
|
|
|
|
for hand in handlist:
|
2009-12-17 11:42:50 +01:00
|
|
|
if hand is not None:
|
|
|
|
hand.prepInsert(self.database)
|
2010-01-28 11:56:17 +01:00
|
|
|
try:
|
|
|
|
hand.insert(self.database)
|
|
|
|
except Exceptions.FpdbHandDuplicate:
|
|
|
|
duplicates += 1
|
|
|
|
else:
|
|
|
|
if self.callHud and hand.dbid_hands != 0:
|
|
|
|
to_hud.append(hand.dbid_hands)
|
|
|
|
else: # TODO: Treat empty as an error, or just ignore?
|
2009-12-17 11:42:50 +01:00
|
|
|
log.error("Hand processed but empty")
|
2010-02-03 02:03:36 +01:00
|
|
|
|
2009-12-23 16:14:34 +01:00
|
|
|
# Call hudcache update if not in bulk import mode
|
|
|
|
# FIXME: Need to test for bulk import that isn't rebuilding the cache
|
2009-12-22 15:15:27 +01:00
|
|
|
if self.callHud:
|
2010-01-25 18:00:22 +01:00
|
|
|
for hand in handlist:
|
2010-02-20 18:49:03 +01:00
|
|
|
if hand is not None and not hand.is_duplicate:
|
2010-01-25 18:00:22 +01:00
|
|
|
hand.updateHudCache(self.database)
|
2010-02-03 02:03:36 +01:00
|
|
|
self.database.commit()
|
2009-12-04 10:56:56 +01:00
|
|
|
|
2009-12-16 19:24:57 +01:00
|
|
|
#pipe the Hands.id out to the HUD
|
|
|
|
for hid in to_hud:
|
|
|
|
print "fpdb_import: sending hand to hud", hand.dbid_hands, "pipe =", self.caller.pipe_to_hud
|
|
|
|
self.caller.pipe_to_hud.stdin.write("%s" % (hid) + os.linesep)
|
|
|
|
|
2009-12-04 10:56:56 +01:00
|
|
|
errors = getattr(hhc, 'numErrors')
|
|
|
|
stored = getattr(hhc, 'numHands')
|
2010-01-28 11:56:17 +01:00
|
|
|
stored -= duplicates
|
2010-04-22 18:28:30 +02:00
|
|
|
stored -= errors
|
2009-02-19 11:38:51 +01:00
|
|
|
else:
|
2009-06-20 08:02:03 +02:00
|
|
|
# conversion didn't work
|
|
|
|
# TODO: appropriate response?
|
2010-01-28 11:19:19 +01:00
|
|
|
return (0, 0, 0, 1, time() - ttime)
|
2009-06-20 08:02:03 +02:00
|
|
|
else:
|
2009-08-12 02:46:39 +02:00
|
|
|
log.warning("Unknown filter filter_name:'%s' in filter:'%s'" %(filter_name, filter))
|
2010-01-28 11:19:19 +01:00
|
|
|
return (0, 0, 0, 1, time() - ttime)
|
|
|
|
|
|
|
|
ttime = time() - ttime
|
2009-02-05 10:28:18 +01:00
|
|
|
|
|
|
|
#This will barf if conv.getStatus != True
|
2009-01-16 18:24:00 +01:00
|
|
|
return (stored, duplicates, partial, errors, ttime)
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
|
2008-12-18 23:39:43 +01:00
|
|
|
def printEmailErrorMessage(self, errors, filename, line):
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
2010-07-02 23:48:01 +02:00
|
|
|
print "Error No.",errors,", please send the hand causing this to fpdb-main@lists.sourceforge.net so we can fix the problem."
|
2008-12-18 23:39:43 +01:00
|
|
|
print "Filename:", filename
|
2010-07-02 23:48:01 +02:00
|
|
|
print "Here is the first line of the hand so you can identify it. Please mention that the error was a ValueError:"
|
2008-12-18 23:39:43 +01:00
|
|
|
print self.hand[0]
|
|
|
|
print "Hand logged to hand-errors.txt"
|
|
|
|
logfile = open('hand-errors.txt', 'a')
|
|
|
|
for s in self.hand:
|
|
|
|
logfile.write(str(s) + "\n")
|
|
|
|
logfile.write("\n")
|
|
|
|
logfile.close()
|
2008-08-04 05:44:28 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2008-12-06 20:50:40 +01:00
|
|
|
print "CLI for fpdb_import is now available as CliFpdb.py"
|