2011-02-26 20:26:46 +01:00
|
|
|
#!/usr/bin/env python
|
2010-07-02 23:48:01 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
2008-08-04 05:44:28 +02:00
|
|
|
|
2011-03-10 06:16:31 +01:00
|
|
|
#Copyright 2008-2011 Steffen Schaumburg
|
2008-08-04 05:44:28 +02:00
|
|
|
#This program is free software: you can redistribute it and/or modify
|
|
|
|
#it under the terms of the GNU Affero General Public License as published by
|
|
|
|
#the Free Software Foundation, version 3 of the License.
|
|
|
|
#
|
|
|
|
#This program is distributed in the hope that it will be useful,
|
|
|
|
#but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
#GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
#You should have received a copy of the GNU Affero General Public License
|
|
|
|
#along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2010-07-04 03:05:16 +02:00
|
|
|
#In the "official" distribution you can find the license in agpl-3.0.txt.
|
2008-08-04 05:44:28 +02:00
|
|
|
|
2010-09-22 18:10:32 +02:00
|
|
|
import L10n
|
|
|
|
_ = L10n.get_translation()
|
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# Standard Library modules
|
|
|
|
|
|
|
|
import os # todo: remove this once import_dir is in fpdb_import
|
2008-08-04 05:44:28 +02:00
|
|
|
import sys
|
2009-11-30 15:08:30 +01:00
|
|
|
from time import time, strftime, sleep, clock
|
2009-02-07 16:06:48 +01:00
|
|
|
import traceback
|
|
|
|
import math
|
|
|
|
import datetime
|
|
|
|
import re
|
2009-07-31 22:24:21 +02:00
|
|
|
import Queue
|
|
|
|
from collections import deque # using Queue for now
|
|
|
|
import threading
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2010-02-01 22:03:51 +01:00
|
|
|
import logging
|
|
|
|
# logging has been set up in fpdb.py or HUD_main.py, use their settings:
|
|
|
|
log = logging.getLogger("importer")
|
|
|
|
|
2011-03-22 08:20:50 +01:00
|
|
|
import pygtk
|
|
|
|
import gtk
|
|
|
|
|
2010-08-16 02:57:03 +02:00
|
|
|
# fpdb/FreePokerTools modules
|
2009-06-26 00:14:32 +02:00
|
|
|
import Database
|
2009-02-07 16:06:48 +01:00
|
|
|
import Configuration
|
2009-09-11 07:12:46 +02:00
|
|
|
import Exceptions
|
2009-02-07 16:06:48 +01:00
|
|
|
|
2009-08-12 02:46:39 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
# database interface modules
|
2008-09-16 23:19:50 +02:00
|
|
|
try:
|
2008-12-06 20:50:40 +01:00
|
|
|
import MySQLdb
|
2009-09-16 04:07:31 +02:00
|
|
|
except ImportError:
|
2010-08-16 02:57:03 +02:00
|
|
|
log.debug(_("Import database module: MySQLdb not found"))
|
2009-09-16 03:10:18 +02:00
|
|
|
else:
|
|
|
|
mysqlLibFound = True
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2008-09-16 23:19:50 +02:00
|
|
|
try:
|
2008-12-06 20:50:40 +01:00
|
|
|
import psycopg2
|
2009-09-16 04:07:31 +02:00
|
|
|
except ImportError:
|
2010-08-16 02:57:03 +02:00
|
|
|
log.debug(_("Import database module: psycopg2 not found"))
|
2009-09-16 03:10:18 +02:00
|
|
|
else:
|
2009-07-31 22:24:21 +02:00
|
|
|
import psycopg2.extensions
|
2009-06-06 17:17:49 +02:00
|
|
|
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
|
2008-09-16 23:19:50 +02:00
|
|
|
|
2008-10-08 19:36:08 +02:00
|
|
|
class Importer:
|
2010-09-05 00:12:29 +02:00
|
|
|
def __init__(self, caller, settings, config, sql = None, parent = None):
|
2008-12-06 20:50:40 +01:00
|
|
|
"""Constructor"""
|
2009-03-19 02:22:04 +01:00
|
|
|
self.settings = settings
|
|
|
|
self.caller = caller
|
|
|
|
self.config = config
|
2009-07-29 07:37:06 +02:00
|
|
|
self.sql = sql
|
2010-09-04 13:30:43 +02:00
|
|
|
self.parent = parent
|
2009-07-29 07:37:06 +02:00
|
|
|
|
2010-02-01 23:31:00 +01:00
|
|
|
#log = Configuration.get_logger("logging.conf", "importer", log_dir=self.config.dir_log)
|
2009-03-19 02:22:04 +01:00
|
|
|
self.filelist = {}
|
|
|
|
self.dirlist = {}
|
2009-03-21 13:23:51 +01:00
|
|
|
self.siteIds = {}
|
2009-02-19 11:38:51 +01:00
|
|
|
self.addToDirList = {}
|
2009-02-26 04:44:03 +01:00
|
|
|
self.removeFromFileList = {} # to remove deleted files
|
2009-03-19 02:22:04 +01:00
|
|
|
self.monitor = False
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize = {}
|
|
|
|
self.updatedtime = {}
|
2009-03-19 02:22:04 +01:00
|
|
|
self.lines = None
|
2009-03-21 13:23:51 +01:00
|
|
|
self.faobs = None # File as one big string
|
|
|
|
self.pos_in_file = {} # dict to remember how far we have read in the file
|
2008-12-06 20:50:40 +01:00
|
|
|
#Set defaults
|
2009-03-19 02:22:04 +01:00
|
|
|
self.callHud = self.config.get_import_parameters().get("callFpdbHud")
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-08-04 22:41:04 +02:00
|
|
|
# CONFIGURATION OPTIONS
|
2009-03-26 23:55:16 +01:00
|
|
|
self.settings.setdefault("handCount", 0)
|
2009-08-04 22:41:04 +02:00
|
|
|
#self.settings.setdefault("allowHudcacheRebuild", True) # NOT USED NOW
|
|
|
|
#self.settings.setdefault("forceThreads", 2) # NOT USED NOW
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings.setdefault("writeQSize", 1000) # no need to change
|
|
|
|
self.settings.setdefault("writeQMaxWait", 10) # not used
|
2009-08-06 22:12:50 +02:00
|
|
|
self.settings.setdefault("dropIndexes", "don't drop")
|
|
|
|
self.settings.setdefault("dropHudCache", "don't drop")
|
2009-12-17 11:42:50 +01:00
|
|
|
self.settings.setdefault("starsArchive", False)
|
2010-09-15 07:23:32 +02:00
|
|
|
self.settings.setdefault("ftpArchive", False)
|
2010-08-19 12:25:26 +02:00
|
|
|
self.settings.setdefault("testData", False)
|
2010-08-19 12:33:43 +02:00
|
|
|
self.settings.setdefault("cacheHHC", False)
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
self.writeq = None
|
|
|
|
self.database = Database.Database(self.config, sql = self.sql)
|
|
|
|
self.writerdbs = []
|
2009-08-04 22:41:04 +02:00
|
|
|
self.settings.setdefault("threads", 1) # value set by GuiBulkImport
|
2009-07-31 22:24:21 +02:00
|
|
|
for i in xrange(self.settings['threads']):
|
|
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-11-30 15:08:30 +01:00
|
|
|
clock() # init clock in windows
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
#Set functions
|
|
|
|
def setCallHud(self, value):
|
|
|
|
self.callHud = value
|
2010-12-02 06:35:18 +01:00
|
|
|
|
|
|
|
def setCacheSessions(self, value):
|
|
|
|
self.cacheSessions = value
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
def setHandCount(self, value):
|
|
|
|
self.settings['handCount'] = int(value)
|
|
|
|
|
|
|
|
def setQuiet(self, value):
|
|
|
|
self.settings['quiet'] = value
|
|
|
|
|
|
|
|
def setFailOnError(self, value):
|
|
|
|
self.settings['failOnError'] = value
|
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
def setHandsInDB(self, value):
|
|
|
|
self.settings['handsInDB'] = value
|
|
|
|
|
|
|
|
def setThreads(self, value):
|
|
|
|
self.settings['threads'] = value
|
2009-07-31 22:24:21 +02:00
|
|
|
if self.settings["threads"] > len(self.writerdbs):
|
|
|
|
for i in xrange(self.settings['threads'] - len(self.writerdbs)):
|
|
|
|
self.writerdbs.append( Database.Database(self.config, sql = self.sql) )
|
2009-02-07 16:06:48 +01:00
|
|
|
|
|
|
|
def setDropIndexes(self, value):
|
|
|
|
self.settings['dropIndexes'] = value
|
|
|
|
|
2009-08-04 22:41:04 +02:00
|
|
|
def setDropHudCache(self, value):
|
|
|
|
self.settings['dropHudCache'] = value
|
|
|
|
|
2009-12-17 11:42:50 +01:00
|
|
|
def setStarsArchive(self, value):
|
|
|
|
self.settings['starsArchive'] = value
|
2010-09-15 07:23:32 +02:00
|
|
|
|
|
|
|
def setFTPArchive(self, value):
|
|
|
|
self.settings['ftpArchive'] = value
|
2009-12-17 11:42:50 +01:00
|
|
|
|
2010-08-19 12:25:26 +02:00
|
|
|
def setPrintTestData(self, value):
|
|
|
|
self.settings['testData'] = value
|
|
|
|
|
|
|
|
def setFakeCacheHHC(self, value):
|
|
|
|
self.settings['cacheHHC'] = value
|
|
|
|
|
|
|
|
def getCachedHHC(self):
|
|
|
|
return self.handhistoryconverter
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
# def setWatchTime(self):
|
|
|
|
# self.updated = time()
|
|
|
|
|
|
|
|
def clearFileList(self):
|
2009-08-06 05:30:21 +02:00
|
|
|
self.updatedsize = {}
|
|
|
|
self.updatetime = {}
|
|
|
|
self.pos_in_file = {}
|
2008-12-06 20:50:40 +01:00
|
|
|
self.filelist = {}
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
def closeDBs(self):
|
|
|
|
self.database.disconnect()
|
|
|
|
for i in xrange(len(self.writerdbs)):
|
|
|
|
self.writerdbs[i].disconnect()
|
2011-04-03 01:56:52 +02:00
|
|
|
|
|
|
|
def logImport(self, type, file, stored, dups, partial, errs, ttime, id):
|
|
|
|
hands = stored + dups + partial + errs
|
2011-04-08 23:51:20 +02:00
|
|
|
now = datetime.datetime.utcnow()
|
2011-04-03 01:56:52 +02:00
|
|
|
ttime100 = ttime * 100
|
|
|
|
self.database.updateFile([type, now, now, hands, stored, dups, partial, errs, ttime100, True, id])
|
|
|
|
|
|
|
|
def addFileToList(self, file, site, filter):
|
2011-04-08 23:51:20 +02:00
|
|
|
now = datetime.datetime.utcnow()
|
2011-04-03 01:56:52 +02:00
|
|
|
file = os.path.splitext(os.path.basename(file))[0]
|
2011-04-10 18:18:49 +02:00
|
|
|
try: #TODO: this is a dirty hack. GBI needs it, GAI fails with it.
|
2011-04-10 18:12:40 +02:00
|
|
|
file = unicode(file, "utf8", "replace")
|
|
|
|
except TypeError:
|
|
|
|
pass
|
|
|
|
id = self.database.storeFile([file, site, now, now, 0, 0, 0, 0, 0, 0, False])
|
2011-04-09 00:43:27 +02:00
|
|
|
self.database.commit()
|
2011-04-03 01:56:52 +02:00
|
|
|
return [site] + [filter] + [id]
|
2009-07-31 22:24:21 +02:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
#Add an individual file to filelist
|
|
|
|
def addImportFile(self, filename, site = "default", filter = "passthrough"):
|
2009-02-07 16:06:48 +01:00
|
|
|
#TODO: test it is a valid file -> put that in config!!
|
2010-07-14 00:12:50 +02:00
|
|
|
#print "addimportfile: filename is a", filename.__class__
|
2011-03-29 11:20:13 +02:00
|
|
|
# filename not guaranteed to be unicode
|
2010-07-14 00:12:50 +02:00
|
|
|
if filename in self.filelist or not os.path.exists(filename):
|
2009-09-11 07:12:46 +02:00
|
|
|
return
|
2011-04-03 01:56:52 +02:00
|
|
|
self.filelist[filename] = self.addFileToList(filename, site, filter)
|
2009-03-21 13:23:51 +01:00
|
|
|
if site not in self.siteIds:
|
|
|
|
# Get id from Sites table in DB
|
2009-07-29 00:58:10 +02:00
|
|
|
result = self.database.get_site_id(site)
|
2009-03-21 13:23:51 +01:00
|
|
|
if len(result) == 1:
|
|
|
|
self.siteIds[site] = result[0][0]
|
|
|
|
else:
|
|
|
|
if len(result) == 0:
|
2010-08-16 02:57:03 +02:00
|
|
|
log.error(_("Database ID for %s not found") % site)
|
2009-03-21 13:23:51 +01:00
|
|
|
else:
|
2010-08-16 02:57:03 +02:00
|
|
|
log.error(_("[ERROR] More than 1 Database ID found for %s - Multiple currencies not implemented yet") % site)
|
2009-03-21 13:23:51 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-02-25 15:59:11 +01:00
|
|
|
# Called from GuiBulkImport to add a file or directory.
|
2009-03-24 14:58:45 +01:00
|
|
|
def addBulkImportImportFileOrDir(self, inputPath, site = "PokerStars"):
|
2009-02-25 15:59:11 +01:00
|
|
|
"""Add a file or directory for bulk import"""
|
2009-03-24 14:58:45 +01:00
|
|
|
filter = self.config.hhcs[site].converter
|
2009-02-25 15:59:11 +01:00
|
|
|
# Bulk import never monitors
|
|
|
|
# if directory, add all files in it. Otherwise add single file.
|
|
|
|
# TODO: only add sane files?
|
|
|
|
if os.path.isdir(inputPath):
|
|
|
|
for subdir in os.walk(inputPath):
|
|
|
|
for file in subdir[2]:
|
2011-03-29 11:20:13 +02:00
|
|
|
self.addImportFile(os.path.join(subdir[0], file), site=site, filter=filter)
|
2009-02-25 15:59:11 +01:00
|
|
|
else:
|
2011-03-29 11:20:13 +02:00
|
|
|
self.addImportFile(inputPath, site=site, filter=filter)
|
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
#Add a directory of files to filelist
|
|
|
|
#Only one import directory per site supported.
|
|
|
|
#dirlist is a hash of lists:
|
|
|
|
#dirlist{ 'PokerStars' => ["/path/to/import/", "filtername"] }
|
2009-11-03 21:29:05 +01:00
|
|
|
def addImportDirectory(self,dir,monitor=False, site="default", filter="passthrough"):
|
2009-03-05 02:12:15 +01:00
|
|
|
#gets called by GuiAutoImport.
|
2009-02-24 14:46:05 +01:00
|
|
|
#This should really be using os.walk
|
|
|
|
#http://docs.python.org/library/os.html
|
2008-12-06 20:50:40 +01:00
|
|
|
if os.path.isdir(dir):
|
|
|
|
if monitor == True:
|
|
|
|
self.monitor = True
|
|
|
|
self.dirlist[site] = [dir] + [filter]
|
|
|
|
|
2009-06-07 21:45:09 +02:00
|
|
|
#print "addImportDirectory: checking files in", dir
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in os.listdir(dir):
|
2009-06-07 21:45:09 +02:00
|
|
|
#print " adding file ", file
|
2008-12-06 20:50:40 +01:00
|
|
|
self.addImportFile(os.path.join(dir, file), site, filter)
|
|
|
|
else:
|
2010-08-29 20:35:16 +02:00
|
|
|
log.warning(_("Attempted to add non-directory '%s' as an import directory") % str(dir))
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
def runImport(self):
|
2009-07-31 22:24:21 +02:00
|
|
|
""""Run full import on self.filelist. This is called from GuiBulkImport.py"""
|
2009-08-04 22:41:04 +02:00
|
|
|
#if self.settings['forceThreads'] > 0: # use forceThreads until threading enabled in GuiBulkImport
|
|
|
|
# self.setThreads(self.settings['forceThreads'])
|
2009-07-21 23:26:23 +02:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
# Initial setup
|
2009-02-07 16:06:48 +01:00
|
|
|
start = datetime.datetime.now()
|
2009-07-31 22:24:21 +02:00
|
|
|
starttime = time()
|
2010-08-16 02:57:03 +02:00
|
|
|
log.info(_("Started at %s -- %d files to import. indexes: %s") % (start, len(self.filelist), self.settings['dropIndexes']))
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['dropIndexes'] == 'auto':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings['dropIndexes'] = self.calculate_auto2(self.database, 12.0, 500.0)
|
2009-08-06 03:07:16 +02:00
|
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'auto':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.settings['dropHudCache'] = self.calculate_auto2(self.database, 25.0, 500.0) # returns "drop"/"don't drop"
|
2009-07-21 23:26:23 +02:00
|
|
|
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['dropIndexes'] == 'drop':
|
2009-07-29 00:58:10 +02:00
|
|
|
self.database.prepareBulkImport()
|
2009-07-21 23:26:23 +02:00
|
|
|
else:
|
2010-10-01 04:44:27 +02:00
|
|
|
log.info(_("No need to drop indexes."))
|
2009-07-21 23:26:23 +02:00
|
|
|
#print "dropInd =", self.settings['dropIndexes'], " dropHudCache =", self.settings['dropHudCache']
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
if self.settings['threads'] <= 0:
|
2011-03-22 20:16:22 +01:00
|
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(None)
|
2009-07-31 22:24:21 +02:00
|
|
|
else:
|
|
|
|
# create queue (will probably change to deque at some point):
|
|
|
|
self.writeq = Queue.Queue( self.settings['writeQSize'] )
|
|
|
|
# start separate thread(s) to read hands from queue and write to db:
|
|
|
|
for i in xrange(self.settings['threads']):
|
|
|
|
t = threading.Thread( target=self.writerdbs[i].insert_queue_hands
|
|
|
|
, args=(self.writeq, self.settings["writeQMaxWait"])
|
|
|
|
, name="dbwriter-"+str(i) )
|
|
|
|
t.setDaemon(True)
|
|
|
|
t.start()
|
|
|
|
# read hands and write to q:
|
2011-03-22 20:16:22 +01:00
|
|
|
(totstored, totdups, totpartial, toterrors) = self.importFiles(self.writeq)
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
if self.writeq.empty():
|
2010-08-16 02:57:03 +02:00
|
|
|
print _("writers finished already")
|
2009-07-31 22:24:21 +02:00
|
|
|
pass
|
|
|
|
else:
|
2010-08-16 02:57:03 +02:00
|
|
|
print _("waiting for writers to finish ...")
|
2009-07-31 22:24:21 +02:00
|
|
|
#for t in threading.enumerate():
|
|
|
|
# print " "+str(t)
|
|
|
|
#self.writeq.join()
|
|
|
|
#using empty() might be more reliable:
|
|
|
|
while not self.writeq.empty() and len(threading.enumerate()) > 1:
|
2009-11-25 14:22:14 +01:00
|
|
|
# TODO: Do we need to actually tell the progress indicator to move, or is it already moving, and we just need to process events...
|
2009-11-03 21:29:05 +01:00
|
|
|
while gtk.events_pending(): # see http://faq.pygtk.org/index.py?req=index for more hints (3.7)
|
2009-11-25 14:22:14 +01:00
|
|
|
gtk.main_iteration(False)
|
2009-07-31 22:24:21 +02:00
|
|
|
sleep(0.5)
|
2010-08-16 02:57:03 +02:00
|
|
|
print _(" ... writers finished")
|
2009-07-31 22:24:21 +02:00
|
|
|
|
|
|
|
# Tidying up after import
|
|
|
|
if self.settings['dropIndexes'] == 'drop':
|
|
|
|
self.database.afterBulkImport()
|
|
|
|
else:
|
2010-10-01 04:44:27 +02:00
|
|
|
log.info (_("No need to rebuild indexes."))
|
2009-08-06 03:07:16 +02:00
|
|
|
if 'dropHudCache' in self.settings and self.settings['dropHudCache'] == 'drop':
|
2009-07-31 22:24:21 +02:00
|
|
|
self.database.rebuild_hudcache()
|
|
|
|
else:
|
2010-10-01 04:44:27 +02:00
|
|
|
log.info (_("No need to rebuild hudcache."))
|
2009-07-31 22:24:21 +02:00
|
|
|
self.database.analyzeDB()
|
|
|
|
endtime = time()
|
|
|
|
return (totstored, totdups, totpartial, toterrors, endtime-starttime)
|
|
|
|
# end def runImport
|
|
|
|
|
2011-03-22 20:16:22 +01:00
|
|
|
def importFiles(self, q):
|
2009-07-31 22:24:21 +02:00
|
|
|
""""Read filenames in self.filelist and pass to import_file_dict().
|
|
|
|
Uses a separate database connection if created as a thread (caller
|
|
|
|
passes None or no param as db)."""
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-01-16 18:24:00 +01:00
|
|
|
totstored = 0
|
|
|
|
totdups = 0
|
|
|
|
totpartial = 0
|
|
|
|
toterrors = 0
|
|
|
|
tottime = 0
|
2010-09-05 00:12:29 +02:00
|
|
|
|
|
|
|
#prepare progress popup window
|
|
|
|
ProgressDialog = ProgressBar(len(self.filelist), self.parent)
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in self.filelist:
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2011-04-09 22:34:27 +02:00
|
|
|
ProgressDialog.progress_update(file)
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2011-04-03 01:56:52 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(file, self.filelist[file][0]
|
|
|
|
,self.filelist[file][1], self.filelist[file][2], q)
|
2009-01-16 18:24:00 +01:00
|
|
|
totstored += stored
|
|
|
|
totdups += duplicates
|
|
|
|
totpartial += partial
|
|
|
|
toterrors += errors
|
2011-04-03 01:56:52 +02:00
|
|
|
|
|
|
|
self.logImport('bulk', file, stored, duplicates, partial, errors, ttime, self.filelist[file][2])
|
|
|
|
self.database.commit()
|
2010-09-05 00:12:29 +02:00
|
|
|
del ProgressDialog
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
for i in xrange( self.settings['threads'] ):
|
2010-08-29 20:35:16 +02:00
|
|
|
print _("sending finish message queue length ="), q.qsize()
|
2009-07-31 22:24:21 +02:00
|
|
|
db.send_finish_msg(q)
|
|
|
|
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
return (totstored, totdups, totpartial, toterrors)
|
|
|
|
# end def importFiles
|
|
|
|
|
|
|
|
# not used currently
|
|
|
|
def calculate_auto(self, db):
|
2009-02-07 16:06:48 +01:00
|
|
|
"""An heuristic to determine a reasonable value of drop/don't drop"""
|
2009-03-21 15:27:49 +01:00
|
|
|
if len(self.filelist) == 1: return "don't drop"
|
|
|
|
if 'handsInDB' not in self.settings:
|
|
|
|
try:
|
2009-07-31 22:24:21 +02:00
|
|
|
tmpcursor = db.get_cursor()
|
2009-03-21 15:27:49 +01:00
|
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
|
|
except:
|
|
|
|
pass # if this fails we're probably doomed anyway
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['handsInDB'] < 5000: return "drop"
|
2009-11-25 14:22:14 +01:00
|
|
|
if len(self.filelist) < 50: return "don't drop"
|
2009-02-07 16:06:48 +01:00
|
|
|
if self.settings['handsInDB'] > 50000: return "don't drop"
|
|
|
|
return "drop"
|
2008-12-06 20:50:40 +01:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
def calculate_auto2(self, db, scale, increment):
|
2009-06-09 23:22:10 +02:00
|
|
|
"""A second heuristic to determine a reasonable value of drop/don't drop
|
|
|
|
This one adds up size of files to import to guess number of hands in them
|
|
|
|
Example values of scale and increment params might be 10 and 500 meaning
|
|
|
|
roughly: drop if importing more than 10% (100/scale) of hands in db or if
|
|
|
|
less than 500 hands in db"""
|
|
|
|
size_per_hand = 1300.0 # wag based on a PS 6-up FLHE file. Actual value not hugely important
|
|
|
|
# as values of scale and increment compensate for it anyway.
|
|
|
|
# decimal used to force float arithmetic
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-09 23:22:10 +02:00
|
|
|
# get number of hands in db
|
|
|
|
if 'handsInDB' not in self.settings:
|
|
|
|
try:
|
2009-07-31 22:24:21 +02:00
|
|
|
tmpcursor = db.get_cursor()
|
2009-06-09 23:22:10 +02:00
|
|
|
tmpcursor.execute("Select count(1) from Hands;")
|
|
|
|
self.settings['handsInDB'] = tmpcursor.fetchone()[0]
|
|
|
|
except:
|
|
|
|
pass # if this fails we're probably doomed anyway
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-09 23:22:10 +02:00
|
|
|
# add up size of import files
|
|
|
|
total_size = 0.0
|
|
|
|
for file in self.filelist:
|
|
|
|
if os.path.exists(file):
|
|
|
|
stat_info = os.stat(file)
|
|
|
|
total_size += stat_info.st_size
|
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
# if hands_in_db is zero or very low, we want to drop indexes, otherwise compare
|
2009-06-09 23:22:10 +02:00
|
|
|
# import size with db size somehow:
|
2009-07-21 23:26:23 +02:00
|
|
|
ret = "don't drop"
|
2009-06-09 23:22:10 +02:00
|
|
|
if self.settings['handsInDB'] < scale * (total_size/size_per_hand) + increment:
|
2009-07-21 23:26:23 +02:00
|
|
|
ret = "drop"
|
|
|
|
#print "auto2: handsindb =", self.settings['handsInDB'], "total_size =", total_size, "size_per_hand =", \
|
|
|
|
# size_per_hand, "inc =", increment, "return:", ret
|
|
|
|
return ret
|
2009-06-09 23:22:10 +02:00
|
|
|
|
2009-07-31 22:24:21 +02:00
|
|
|
#Run import on updated files, then store latest update time. Called from GuiAutoImport.py
|
2008-12-06 20:50:40 +01:00
|
|
|
def runUpdated(self):
|
2009-02-26 16:36:23 +01:00
|
|
|
#Check for new files in monitored directories
|
2008-12-06 20:50:40 +01:00
|
|
|
#todo: make efficient - always checks for new file, should be able to use mtime of directory
|
|
|
|
# ^^ May not work on windows
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog = open('runUpdated.txt', 'a')
|
|
|
|
#rulog.writelines("runUpdated ... ")
|
2008-12-06 20:50:40 +01:00
|
|
|
for site in self.dirlist:
|
|
|
|
self.addImportDirectory(self.dirlist[site][0], False, site, self.dirlist[site][1])
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2008-12-06 20:50:40 +01:00
|
|
|
for file in self.filelist:
|
2009-03-19 16:27:08 +01:00
|
|
|
if os.path.exists(file):
|
2009-03-13 11:21:48 +01:00
|
|
|
stat_info = os.stat(file)
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog.writelines("path exists ")
|
2009-07-31 07:15:28 +02:00
|
|
|
if file in self.updatedsize: # we should be able to assume that if we're in size, we're in time as well
|
|
|
|
if stat_info.st_size > self.updatedsize[file] or stat_info.st_mtime > self.updatedtime[file]:
|
2010-08-31 23:44:41 +02:00
|
|
|
# print "file",file," updated", os.path.basename(file), stat_info.st_size, self.updatedsize[file], stat_info.st_mtime, self.updatedtime[file]
|
2009-11-30 05:52:36 +01:00
|
|
|
try:
|
|
|
|
if not os.path.isdir(file):
|
2009-11-30 14:14:03 +01:00
|
|
|
self.caller.addText("\n"+os.path.basename(file))
|
2009-11-30 05:52:36 +01:00
|
|
|
except KeyError: # TODO: What error happens here?
|
|
|
|
pass
|
2011-04-03 01:56:52 +02:00
|
|
|
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(file, self.filelist[file][0]
|
|
|
|
,self.filelist[file][1], self.filelist[file][2], None)
|
|
|
|
self.logImport('auto', file, stored, duplicates, partial, errors, ttime, self.filelist[file][2])
|
2009-11-30 14:14:03 +01:00
|
|
|
try:
|
2010-01-28 11:19:19 +01:00
|
|
|
if not os.path.isdir(file): # Note: This assumes that whatever calls us has an "addText" func
|
2009-11-30 15:08:30 +01:00
|
|
|
self.caller.addText(" %d stored, %d duplicates, %d partial, %d errors (time = %f)" % (stored, duplicates, partial, errors, ttime))
|
2009-11-30 14:14:03 +01:00
|
|
|
except KeyError: # TODO: Again, what error happens here? fix when we find out ..
|
|
|
|
pass
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = stat_info.st_size
|
|
|
|
self.updatedtime[file] = time()
|
2009-07-31 03:50:08 +02:00
|
|
|
else:
|
2009-03-13 11:21:48 +01:00
|
|
|
if os.path.isdir(file) or (time() - stat_info.st_mtime) < 60:
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = 0
|
|
|
|
self.updatedtime[file] = 0
|
2009-07-31 03:50:08 +02:00
|
|
|
else:
|
2009-07-31 07:15:28 +02:00
|
|
|
self.updatedsize[file] = stat_info.st_size
|
|
|
|
self.updatedtime[file] = time()
|
2009-03-13 11:21:48 +01:00
|
|
|
else:
|
2009-05-27 23:21:22 +02:00
|
|
|
self.removeFromFileList[file] = True
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-03-10 14:00:37 +01:00
|
|
|
self.addToDirList = filter(lambda x: self.addImportDirectory(x, True, self.addToDirList[x][0], self.addToDirList[x][1]), self.addToDirList)
|
2009-03-27 16:54:13 +01:00
|
|
|
|
2009-02-26 04:44:03 +01:00
|
|
|
for file in self.removeFromFileList:
|
2009-02-26 05:17:36 +01:00
|
|
|
if file in self.filelist:
|
|
|
|
del self.filelist[file]
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-02-19 11:38:51 +01:00
|
|
|
self.addToDirList = {}
|
2009-02-26 04:44:03 +01:00
|
|
|
self.removeFromFileList = {}
|
2009-07-29 00:58:10 +02:00
|
|
|
self.database.rollback()
|
2009-06-07 21:45:09 +02:00
|
|
|
#rulog.writelines(" finished\n")
|
|
|
|
#rulog.close()
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
# This is now an internal function that should not be called directly.
|
2011-04-03 01:56:52 +02:00
|
|
|
def import_file_dict(self, file, site, filter, fileId, q=None):
|
2009-11-25 14:22:14 +01:00
|
|
|
|
2009-02-19 11:38:51 +01:00
|
|
|
if os.path.isdir(file):
|
|
|
|
self.addToDirList[file] = [site] + [filter]
|
2009-11-30 14:14:03 +01:00
|
|
|
return (0,0,0,0,0)
|
2009-03-13 21:00:12 +01:00
|
|
|
|
2010-01-28 11:19:19 +01:00
|
|
|
(stored, duplicates, partial, errors, ttime) = (0, 0, 0, 0, time())
|
2009-07-31 22:24:21 +02:00
|
|
|
|
2009-06-20 08:02:03 +02:00
|
|
|
# Load filter, process file, pass returned filename to import_fpdb_file
|
2009-11-03 20:30:52 +01:00
|
|
|
if self.settings['threads'] > 0 and self.writeq is not None:
|
2011-03-22 20:16:22 +01:00
|
|
|
log.info((_("Converting %s") % file) + " (" + str(q.qsize()) + ")")
|
|
|
|
else: log.info(_("Converting %s") % file)
|
|
|
|
|
2009-06-20 08:02:03 +02:00
|
|
|
filter_name = filter.replace("ToFpdb", "")
|
|
|
|
mod = __import__(filter)
|
|
|
|
obj = getattr(mod, filter_name, None)
|
|
|
|
if callable(obj):
|
2011-03-22 20:16:22 +01:00
|
|
|
|
|
|
|
if file in self.pos_in_file: idx = self.pos_in_file[file]
|
|
|
|
else: self.pos_in_file[file], idx = 0, 0
|
|
|
|
|
|
|
|
hhc = obj( self.config, in_path = file, index = idx
|
|
|
|
,starsArchive = self.settings['starsArchive']
|
|
|
|
,ftpArchive = self.settings['ftpArchive']
|
2011-04-03 01:56:52 +02:00
|
|
|
,sitename = site)
|
2011-03-22 20:16:22 +01:00
|
|
|
|
2010-01-23 06:37:41 +01:00
|
|
|
if hhc.getStatus():
|
2011-03-23 20:34:15 +01:00
|
|
|
if self.caller: hhc.progressNotify()
|
2009-07-21 23:26:23 +02:00
|
|
|
handlist = hhc.getProcessedHands()
|
|
|
|
self.pos_in_file[file] = hhc.getLastCharacterRead()
|
2011-03-29 09:36:18 +02:00
|
|
|
(hbulk, hpbulk, habulk, hcbulk, phands, ihands, to_hud) = ([], [], [], [], [], [], [])
|
2011-03-22 20:16:22 +01:00
|
|
|
sc, gsc = {'bk': []}, {'bk': []}
|
|
|
|
|
|
|
|
####Lock Placeholder####
|
2009-07-17 11:16:06 +02:00
|
|
|
for hand in handlist:
|
2011-04-02 16:48:38 +02:00
|
|
|
hand.prepInsert(self.database, printtest = self.settings['testData'])
|
2011-03-22 20:16:22 +01:00
|
|
|
self.database.commit()
|
|
|
|
phands.append(hand)
|
|
|
|
####Lock Placeholder####
|
|
|
|
|
|
|
|
for hand in phands:
|
|
|
|
hand.assembleHand()
|
|
|
|
|
|
|
|
####Lock Placeholder####
|
|
|
|
id = self.database.nextHandId()
|
|
|
|
for i in range(len(phands)):
|
|
|
|
doinsert = len(phands)==i+1
|
|
|
|
hand = phands[i]
|
|
|
|
try:
|
|
|
|
id = hand.getHandId(self.database, id)
|
2011-03-23 02:22:18 +01:00
|
|
|
sc, gsc = hand.updateSessionsCache(self.database, sc, gsc, None, doinsert)
|
2011-04-03 01:56:52 +02:00
|
|
|
hbulk = hand.insertHands(self.database, hbulk, fileId, doinsert, self.settings['testData'])
|
2011-03-22 20:16:22 +01:00
|
|
|
hcbulk = hand.updateHudCache(self.database, hcbulk, doinsert)
|
|
|
|
ihands.append(hand)
|
2011-04-04 08:32:57 +02:00
|
|
|
to_hud.append(hand.dbid_hands)
|
2011-03-22 20:16:22 +01:00
|
|
|
except Exceptions.FpdbHandDuplicate:
|
|
|
|
duplicates += 1
|
|
|
|
self.database.commit()
|
|
|
|
####Lock Placeholder####
|
|
|
|
|
|
|
|
for i in range(len(ihands)):
|
|
|
|
doinsert = len(ihands)==i+1
|
|
|
|
hand = ihands[i]
|
2011-03-23 05:59:44 +01:00
|
|
|
hpbulk = hand.insertHandsPlayers(self.database, hpbulk, doinsert, self.settings['testData'])
|
|
|
|
habulk = hand.insertHandsActions(self.database, habulk, doinsert, self.settings['testData'])
|
2010-02-03 02:03:36 +01:00
|
|
|
self.database.commit()
|
2009-12-04 10:56:56 +01:00
|
|
|
|
2009-12-16 19:24:57 +01:00
|
|
|
#pipe the Hands.id out to the HUD
|
2011-04-01 07:35:13 +02:00
|
|
|
if self.callHud:
|
2010-12-02 06:40:31 +01:00
|
|
|
for hid in to_hud:
|
|
|
|
try:
|
2011-04-04 08:32:57 +02:00
|
|
|
print _("fpdb_import: sending hand to hud"), hid, "pipe =", self.caller.pipe_to_hud
|
2010-12-02 06:40:31 +01:00
|
|
|
self.caller.pipe_to_hud.stdin.write("%s" % (hid) + os.linesep)
|
|
|
|
except IOError, e:
|
|
|
|
log.error(_("Failed to send hand to HUD: %s") % e)
|
2009-12-16 19:24:57 +01:00
|
|
|
|
2009-12-04 10:56:56 +01:00
|
|
|
errors = getattr(hhc, 'numErrors')
|
|
|
|
stored = getattr(hhc, 'numHands')
|
2010-01-28 11:56:17 +01:00
|
|
|
stored -= duplicates
|
2010-04-22 18:28:30 +02:00
|
|
|
stored -= errors
|
2010-08-19 12:25:26 +02:00
|
|
|
# Really ugly hack to allow testing Hands within the HHC from someone
|
|
|
|
# with only an Importer objec
|
|
|
|
if self.settings['cacheHHC']:
|
|
|
|
self.handhistoryconverter = hhc
|
2009-02-19 11:38:51 +01:00
|
|
|
else:
|
2009-06-20 08:02:03 +02:00
|
|
|
# conversion didn't work
|
|
|
|
# TODO: appropriate response?
|
2010-01-28 11:19:19 +01:00
|
|
|
return (0, 0, 0, 1, time() - ttime)
|
2009-06-20 08:02:03 +02:00
|
|
|
else:
|
2010-08-16 02:57:03 +02:00
|
|
|
log.warning(_("Unknown filter filter_name:'%s' in filter:'%s'") %(filter_name, filter))
|
2010-01-28 11:19:19 +01:00
|
|
|
return (0, 0, 0, 1, time() - ttime)
|
|
|
|
|
|
|
|
ttime = time() - ttime
|
2009-02-05 10:28:18 +01:00
|
|
|
|
|
|
|
#This will barf if conv.getStatus != True
|
2009-01-16 18:24:00 +01:00
|
|
|
return (stored, duplicates, partial, errors, ttime)
|
2008-12-06 20:50:40 +01:00
|
|
|
|
|
|
|
|
2008-12-18 23:39:43 +01:00
|
|
|
def printEmailErrorMessage(self, errors, filename, line):
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
2010-08-16 02:57:03 +02:00
|
|
|
print (_("Error No.%s please send the hand causing this to fpdb-main@lists.sourceforge.net so we can fix the problem.") % errors)
|
|
|
|
print _("Filename:"), filename
|
|
|
|
print _("Here is the first line of the hand so you can identify it. Please mention that the error was a ValueError:")
|
2008-12-18 23:39:43 +01:00
|
|
|
print self.hand[0]
|
2010-08-16 02:57:03 +02:00
|
|
|
print _("Hand logged to hand-errors.txt")
|
2008-12-18 23:39:43 +01:00
|
|
|
logfile = open('hand-errors.txt', 'a')
|
|
|
|
for s in self.hand:
|
|
|
|
logfile.write(str(s) + "\n")
|
|
|
|
logfile.write("\n")
|
|
|
|
logfile.close()
|
2010-09-04 13:30:43 +02:00
|
|
|
|
|
|
|
|
|
|
|
class ProgressBar:
|
|
|
|
|
2010-09-05 00:12:29 +02:00
|
|
|
"""
|
|
|
|
Popup window to show progress
|
|
|
|
|
|
|
|
Init method sets up total number of expected iterations
|
|
|
|
If no parent is passed to init, command line
|
|
|
|
mode assumed, and does not create a progress bar
|
|
|
|
"""
|
|
|
|
|
2010-09-04 13:30:43 +02:00
|
|
|
def __del__(self):
|
2010-09-05 00:12:29 +02:00
|
|
|
|
|
|
|
if self.parent:
|
|
|
|
self.progress.destroy()
|
|
|
|
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2011-04-09 22:34:27 +02:00
|
|
|
def progress_update(self, file):
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2010-09-05 00:12:29 +02:00
|
|
|
if not self.parent:
|
|
|
|
#nothing to do
|
|
|
|
return
|
|
|
|
|
|
|
|
self.fraction += 1
|
|
|
|
#update sum if fraction exceeds expected total number of iterations
|
|
|
|
if self.fraction > self.sum:
|
|
|
|
sum = self.fraction
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2010-09-05 00:12:29 +02:00
|
|
|
#progress bar total set to 1 plus the number of items,to prevent it
|
|
|
|
#reaching 100% prior to processing fully completing
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2010-09-05 00:12:29 +02:00
|
|
|
progress_percent = float(self.fraction) / (float(self.sum) + 1.0)
|
|
|
|
progress_text = (self.title + " "
|
|
|
|
+ str(self.fraction) + " / " + str(self.sum))
|
2010-09-04 13:30:43 +02:00
|
|
|
|
2010-09-05 00:12:29 +02:00
|
|
|
self.pbar.set_fraction(progress_percent)
|
|
|
|
self.pbar.set_text(progress_text)
|
2011-04-09 22:34:27 +02:00
|
|
|
|
|
|
|
now = datetime.datetime.now()
|
|
|
|
now_formatted = now.strftime("%H:%M:%S")
|
|
|
|
self.progresstext.set_text(now_formatted + " - "+self.title+ " " +file+"\n")
|
2010-09-05 00:12:29 +02:00
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, sum, parent):
|
|
|
|
|
|
|
|
self.parent = parent
|
|
|
|
if not self.parent:
|
|
|
|
#no parent is passed, assume this is being run from the
|
|
|
|
#command line, so return immediately
|
|
|
|
return
|
|
|
|
|
|
|
|
self.fraction = 0
|
|
|
|
self.sum = sum
|
|
|
|
self.title = _("Importing")
|
|
|
|
|
2010-09-04 13:30:43 +02:00
|
|
|
self.progress = gtk.Window(gtk.WINDOW_TOPLEVEL)
|
2011-04-09 22:34:27 +02:00
|
|
|
self.progress.set_size_request(500,150)
|
2010-09-04 13:30:43 +02:00
|
|
|
|
|
|
|
self.progress.set_resizable(False)
|
|
|
|
self.progress.set_modal(True)
|
2010-09-05 00:12:29 +02:00
|
|
|
self.progress.set_transient_for(self.parent)
|
|
|
|
self.progress.set_decorated(True)
|
|
|
|
self.progress.set_deletable(False)
|
|
|
|
self.progress.set_title(self.title)
|
|
|
|
|
2010-09-04 13:30:43 +02:00
|
|
|
vbox = gtk.VBox(False, 5)
|
|
|
|
vbox.set_border_width(10)
|
|
|
|
self.progress.add(vbox)
|
|
|
|
vbox.show()
|
|
|
|
|
2011-04-09 22:34:27 +02:00
|
|
|
align = gtk.Alignment(0, 0, 0, 0)
|
|
|
|
vbox.pack_start(align, False, True, 2)
|
2010-09-04 13:30:43 +02:00
|
|
|
align.show()
|
|
|
|
|
|
|
|
self.pbar = gtk.ProgressBar()
|
|
|
|
align.add(self.pbar)
|
|
|
|
self.pbar.show()
|
|
|
|
|
2011-04-09 22:34:27 +02:00
|
|
|
align = gtk.Alignment(0, 0, 0, 0)
|
|
|
|
vbox.pack_start(align, False, True, 0)
|
|
|
|
align.show()
|
|
|
|
|
|
|
|
self.progresstext = gtk.Label()
|
|
|
|
self.progresstext.set_line_wrap(True)
|
|
|
|
align.add(self.progresstext)
|
|
|
|
self.progresstext.show()
|
|
|
|
|
2010-09-04 13:30:43 +02:00
|
|
|
self.progress.show()
|
|
|
|
|
2008-08-04 05:44:28 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2010-12-23 07:18:27 +01:00
|
|
|
print _("CLI for importing hands is GuiBulkImport.py")
|