Update bulk import, optionally drop indexes, improve gui

This commit is contained in:
Ray 2009-02-07 10:06:48 -05:00
parent 61259b6129
commit 18140defd8
2 changed files with 244 additions and 159 deletions

View File

@ -1,156 +1,212 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*-
#Copyright 2008 Steffen Jobbagy-Felso # Copyright 2008 Steffen Jobbagy-Felso
#This program is free software: you can redistribute it and/or modify # This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU Affero General Public License as published by # it under the terms of the GNU Affero General Public License as published by
#the Free Software Foundation, version 3 of the License. # the Free Software Foundation, version 3 of the License.
# #
#This program is distributed in the hope that it will be useful, # This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details. # GNU General Public License for more details.
# #
#You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
#In the "official" distribution you can find the license in # In the "official" distribution you can find the license in
#agpl-3.0.txt in the docs folder of the package. # agpl-3.0.txt in the docs folder of the package.
import threading # Standard Library modules
import fpdb_simple import os
import fpdb_import from time import time
# pyGTK modules
import pygtk import pygtk
pygtk.require('2.0') pygtk.require('2.0')
import gtk import gtk
import os #todo: remove this once import_dir is in fpdb_import
from time import time
class GuiBulkImport (threading.Thread): # fpdb/FreePokerTools modules
def import_dir(self): import fpdb_simple
"""imports a directory, non-recursive. todo: move this to fpdb_import so CLI can use it""" import fpdb_import
self.path=self.inputFile import fpdb_db
self.importer.addImportDirectory(self.path) import Configuration
self.importer.setCallHud(False)
starttime = time()
(stored, dups, partial, errs, ttime) = self.importer.runImport()
print "GuiBulkImport.import_dir done: Stored: %d Dupllicates: %d Partial: %d Errors: %d in %s seconds - %d/sec" %(stored, dups, partial, errs, ttime, (stored/ttime))
def load_clicked(self, widget, data=None):
self.inputFile=self.chooser.get_filename()
self.handCount=self.hand_count_tbuffer.get_text(self.hand_count_tbuffer.get_start_iter(), self.hand_count_tbuffer.get_end_iter())
if (self.handCount=="unlimited" or self.handCount=="Unlimited"):
self.importer.setHandCount(0)
else:
self.importer.setHandCount(int(self.handCount))
self.errorFile="failed.txt" class GuiBulkImport():
self.minPrint=self.min_print_tbuffer.get_text(self.min_print_tbuffer.get_start_iter(), self.min_print_tbuffer.get_end_iter())
if (self.minPrint=="never" or self.minPrint=="Never"):
self.importer.setMinPrint(0)
else:
self.importer.setMinPrint=int(self.minPrint)
self.quiet=self.info_tbuffer.get_text(self.info_tbuffer.get_start_iter(), self.info_tbuffer.get_end_iter())
if (self.quiet=="yes"):
self.importer.setQuiet(False)
else:
self.importer.setQuiet(True)
self.failOnError=self.fail_error_tbuffer.get_text(self.fail_error_tbuffer.get_start_iter(), self.fail_error_tbuffer.get_end_iter())
if (self.failOnError=="no"):
self.importer.setFailOnError(False)
else:
self.importer.setFailOnError(True)
if os.path.isdir(self.inputFile):
self.import_dir()
else:
self.importer.addImportFile(self.inputFile)
self.importer.setCallHud(False)
self.importer.runImport()
self.importer.clearFileList()
def get_vbox(self):
"""returns the vbox of this thread"""
return self.vbox
#end def get_vbox
def run (self):
print "todo: implement bulk import thread"
#end def run
def __init__(self, db, settings, config):
self.db=db
self.settings=settings
self.config=config
self.importer = fpdb_import.Importer(self,self.settings, config)
self.vbox=gtk.VBox(False,1)
self.vbox.show()
self.chooser = gtk.FileChooserWidget()
self.chooser.set_filename(self.settings['bulkImport-defaultPath'])
#chooser.set_default_response(gtk.RESPONSE_OK)
#self.filesel.ok_button.connect_object("clicked", gtk.Widget.destroy, self.filesel)
self.vbox.add(self.chooser)
self.chooser.show()
self.settings_hbox = gtk.HBox(False, 0)
self.vbox.pack_end(self.settings_hbox, False, True, 0)
self.settings_hbox.show()
self.hand_count_label = gtk.Label("Hands to import per file")
self.settings_hbox.add(self.hand_count_label)
self.hand_count_label.show()
self.hand_count_tbuffer=gtk.TextBuffer()
self.hand_count_tbuffer.set_text("unlimited")
self.hand_count_tview=gtk.TextView(self.hand_count_tbuffer)
self.settings_hbox.add(self.hand_count_tview)
self.hand_count_tview.show()
self.min_hands_label = gtk.Label("Status every")
self.settings_hbox.add(self.min_hands_label)
self.min_hands_label.show()
self.min_print_tbuffer=gtk.TextBuffer()
self.min_print_tbuffer.set_text("never")
self.min_print_tview=gtk.TextView(self.min_print_tbuffer)
self.settings_hbox.add(self.min_print_tview)
self.min_print_tview.show()
def import_dir(self):
self.toggles_hbox = gtk.HBox(False, 0) """imports a directory, non-recursive. todo: move this to fpdb_import so CLI can use it"""
self.vbox.pack_end(self.toggles_hbox, False, True, 0)
self.toggles_hbox.show()
self.info_label = gtk.Label("Print start/end info:") self.path = self.inputFile
self.toggles_hbox.add(self.info_label) self.importer.addImportDirectory(self.path)
self.info_label.show() self.importer.setCallHud(False)
starttime = time()
self.info_tbuffer=gtk.TextBuffer() (stored, dups, partial, errs, ttime) = self.importer.runImport()
self.info_tbuffer.set_text("yes") print 'GuiBulkImport.import_dir done: Stored: %d Duplicates: %d Partial: %d Errors: %d in %s seconds - %d/sec'\
self.info_tview=gtk.TextView(self.info_tbuffer) % (stored, dups, partial, errs, ttime, stored / ttime)
self.toggles_hbox.add(self.info_tview)
self.info_tview.show()
self.fail_error_label = gtk.Label("Fail on error:")
self.toggles_hbox.add(self.fail_error_label)
self.fail_error_label.show()
self.fail_error_tbuffer=gtk.TextBuffer()
self.fail_error_tbuffer.set_text("no")
self.fail_error_tview=gtk.TextView(self.fail_error_tbuffer)
self.toggles_hbox.add(self.fail_error_tview)
self.fail_error_tview.show()
self.load_button = gtk.Button("Import") #todo: rename variables to import too def load_clicked(self, widget, data=None):
self.load_button.connect("clicked", self.load_clicked, "Import clicked") # get the dir to import from the chooser
self.toggles_hbox.add(self.load_button) self.inputFile = self.chooser.get_filename()
self.load_button.show()
threading.Thread.__init__ ( self ) # get the import settings from the gui and save in the importer
print "initialised new bulk import thread (not actually a thread yet)" self.importer.setHandCount(int(self.spin_hands.get_text()))
#end class import_threaded self.importer.setMinPrint(int(self.spin_hands.get_text()))
self.importer.setQuiet(self.chk_st_st.get_active())
self.importer.setFailOnError(self.chk_fail.get_active())
self.importer.setThreads(int(self.spin_threads.get_text()))
self.importer.setHandsInDB(self.n_hands_in_db)
cb_model = self.cb.get_model()
cb_index = self.cb.get_active()
if cb_index:
self.importer.setDropIndexes(cb_model[cb_index][0])
else:
self.importer.setDropIndexes("auto")
self.lab_info.set_text("Importing")
if os.path.isdir(self.inputFile):
self.import_dir()
else:
self.importer.addImportFile(self.inputFile)
self.importer.setCallHud(False)
self.importer.runImport()
self.importer.clearFileList()
self.lab_info.set_text("Import finished")
def __init__(self, db, settings, config):
self.db = db # this is an instance of fpdb_db
self.settings = settings
self.config = config
self.importer = fpdb_import.Importer(self, self.settings,
config)
self.vbox = gtk.VBox(False, 0)
self.vbox.show()
self.chooser = gtk.FileChooserWidget()
self.chooser.set_filename(self.settings['bulkImport-defaultPath'])
self.vbox.add(self.chooser)
self.chooser.show()
# Table widget to hold the settings
self.table = gtk.Table(rows = 3, columns = 5, homogeneous = False)
self.vbox.add(self.table)
self.table.show()
# checkbox - print start/stop?
self.chk_st_st = gtk.CheckButton('Print Start/Stop Info')
self.table.attach(self.chk_st_st, 0, 1, 0, 1, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK)
self.chk_st_st.show()
self.chk_st_st.set_active(True)
# label - status
self.lab_status = gtk.Label("Hands/status print:")
self.table.attach(self.lab_status, 1, 2, 0, 1, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK)
self.lab_status.show()
self.lab_status.set_justify(gtk.JUSTIFY_RIGHT)
# spin button - status
status_adj = gtk.Adjustment(value=100, lower=0, upper=300, step_incr=10, page_incr=1, page_size=0) #not sure what upper value should be!
self.spin_status = gtk.SpinButton(adjustment=status_adj, climb_rate=0.0, digits=0)
self.table.attach(self.spin_status, 2, 3, 0, 1, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK)
self.spin_status.show()
# label - threads
self.lab_threads = gtk.Label("Number of threads:")
self.table.attach(self.lab_threads, 3, 4, 0, 1, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK)
self.lab_threads.show()
self.lab_threads.set_sensitive(False)
self.lab_threads.set_justify(gtk.JUSTIFY_RIGHT)
# spin button - threads
threads_adj = gtk.Adjustment(value=0, lower=0, upper=10, step_incr=1, page_incr=1, page_size=0) #not sure what upper value should be!
self.spin_threads = gtk.SpinButton(adjustment=threads_adj, climb_rate=0.0, digits=0)
self.table.attach(self.spin_threads, 4, 5, 0, 1, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK)
self.spin_threads.show()
self.spin_threads.set_sensitive(False)
# checkbox - fail on error?
self.chk_fail = gtk.CheckButton('Fail on error')
self.table.attach(self.chk_fail, 0, 1, 1, 2, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK)
self.chk_fail.show()
# label - hands
self.lab_hands = gtk.Label("Hands/file:")
self.table.attach(self.lab_hands, 1, 2, 1, 2, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK)
self.lab_hands.show()
self.lab_hands.set_justify(gtk.JUSTIFY_RIGHT)
# spin button - hands to import
hands_adj = gtk.Adjustment(value=0, lower=0, upper=10, step_incr=1, page_incr=1, page_size=0) #not sure what upper value should be!
self.spin_hands = gtk.SpinButton(adjustment=hands_adj, climb_rate=0.0, digits=0)
self.table.attach(self.spin_hands, 2, 3, 1, 2, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK)
self.spin_hands.show()
# label - drop indexes
self.lab_drop = gtk.Label("Drop indexes:")
self.table.attach(self.lab_drop, 3, 4, 1, 2, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK)
self.lab_drop.show()
self.lab_drop.set_justify(gtk.JUSTIFY_RIGHT)
# ComboBox - drop indexes
self.cb = gtk.combo_box_new_text()
self.cb.append_text('auto')
self.cb.append_text("don't drop")
self.cb.append_text('drop')
self.cb.set_active(0)
self.table.attach(self.cb, 4, 5, 1, 2, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK)
self.cb.show()
# label - info
self.lab_info = gtk.Label()
self.table.attach(self.lab_info, 0, 4, 2, 3, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK)
self.lab_info.show()
# button - Import
self.load_button = gtk.Button('Import') # todo: rename variables to import too
self.load_button.connect('clicked', self.load_clicked,
'Import clicked')
self.table.attach(self.load_button, 4, 5, 2, 3, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK)
self.load_button.show()
# see how many hands are in the db and adjust accordingly
db_parms = config.get_db_parameters('fpdb')
db.connect(db_parms['db-backend'],
db_parms['db-host'],
db_parms['db-databaseName'],
db_parms['db-user'],
db_parms['db-password'])
cursor = db.db.cursor()
cursor.execute("Select max(id) from Hands;")
row = cursor.fetchone()
db.disconnect() # that's all we need this for
self.n_hands_in_db = row[0]
if self.n_hands_in_db == 0:
self.cb.set_active(2)
self.cb.set_sensitive(False)
self.lab_drop.set_sensitive(False)
if __name__ == '__main__':
def destroy(*args): # call back for terminating the main eventloop
gtk.main_quit()
config = Configuration.Config()
db = fpdb_db.fpdb_db()
settings = {}
if os.name == 'nt': settings['os'] = 'windows'
else: settings['os'] = 'linuxmac'
settings.update(config.get_db_parameters('fpdb'))
settings.update(config.get_tv_parameters())
settings.update(config.get_import_parameters())
settings.update(config.get_default_paths())
i = GuiBulkImport(db, settings, config)
main_window = gtk.Window()
main_window.connect('destroy', destroy)
main_window.add(i.vbox)
main_window.show()
gtk.main()

View File

@ -17,8 +17,24 @@
#see status.txt for site/games support info #see status.txt for site/games support info
import sys # Standard Library modules
import os # todo: remove this once import_dir is in fpdb_import
import sys
from time import time
import traceback
import math
import datetime
import re
# fpdb/FreePokerTools modules
import fpdb_simple
import fpdb_db
import fpdb_parse_logic
import Configuration
# database interface modules
try: try:
import MySQLdb import MySQLdb
mysqlLibFound=True mysqlLibFound=True
@ -31,16 +47,6 @@ try:
except: except:
pass pass
import traceback
import math
import os
import datetime
import re
import fpdb_db
import fpdb_simple
import fpdb_parse_logic
from time import time
class Importer: class Importer:
def __init__(self, caller, settings, config): def __init__(self, caller, settings, config):
@ -60,10 +66,8 @@ class Importer:
#Set defaults #Set defaults
self.callHud = self.config.get_import_parameters().get("callFpdbHud") self.callHud = self.config.get_import_parameters().get("callFpdbHud")
if 'minPrint' not in self.settings: if 'minPrint' not in self.settings:
#TODO: Is this value in the xml file?
self.settings['minPrint'] = 30 self.settings['minPrint'] = 30
if 'handCount' not in self.settings: if 'handCount' not in self.settings:
#TODO: Is this value in the xml file?
self.settings['handCount'] = 0 self.settings['handCount'] = 0
self.fdb = fpdb_db.fpdb_db() # sets self.fdb.db self.fdb.cursor and self.fdb.sql self.fdb = fpdb_db.fpdb_db() # sets self.fdb.db self.fdb.cursor and self.fdb.sql
self.fdb.do_connect(self.config) self.fdb.do_connect(self.config)
@ -84,6 +88,15 @@ class Importer:
def setFailOnError(self, value): def setFailOnError(self, value):
self.settings['failOnError'] = value self.settings['failOnError'] = value
def setHandsInDB(self, value):
self.settings['handsInDB'] = value
def setThreads(self, value):
self.settings['threads'] = value
def setDropIndexes(self, value):
self.settings['dropIndexes'] = value
# def setWatchTime(self): # def setWatchTime(self):
# self.updated = time() # self.updated = time()
@ -92,7 +105,7 @@ class Importer:
#Add an individual file to filelist #Add an individual file to filelist
def addImportFile(self, filename, site = "default", filter = "passthrough"): def addImportFile(self, filename, site = "default", filter = "passthrough"):
#TODO: test it is a valid file #TODO: test it is a valid file -> put that in config!!
self.filelist[filename] = [site] + [filter] self.filelist[filename] = [site] + [filter]
#Add a directory of files to filelist #Add a directory of files to filelist
@ -110,14 +123,20 @@ class Importer:
else: else:
print "Warning: Attempted to add non-directory: '" + str(dir) + "' as an import directory" print "Warning: Attempted to add non-directory: '" + str(dir) + "' as an import directory"
#Run full import on filelist
def runImport(self): def runImport(self):
fpdb_simple.prepareBulkImport(self.fdb) """"Run full import on self.filelist."""
start = datetime.datetime.now()
print "started at", start, "--", len(self.filelist), "files to import.", self.settings['dropIndexes']
if self.settings['dropIndexes'] == 'auto':
self.settings['dropIndexes'] = self.calculate_auto()
if self.settings['dropIndexes'] == 'drop':
fpdb_simple.prepareBulkImport(self.fdb)
totstored = 0 totstored = 0
totdups = 0 totdups = 0
totpartial = 0 totpartial = 0
toterrors = 0 toterrors = 0
tottime = 0 tottime = 0
# if threads <= 1: do this bit
for file in self.filelist: for file in self.filelist:
(stored, duplicates, partial, errors, ttime) = self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1]) (stored, duplicates, partial, errors, ttime) = self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1])
totstored += stored totstored += stored
@ -125,9 +144,19 @@ class Importer:
totpartial += partial totpartial += partial
toterrors += errors toterrors += errors
tottime += ttime tottime += ttime
fpdb_simple.afterBulkImport(self.fdb) if self.settings['dropIndexes'] == 'drop':
fpdb_simple.afterBulkImport(self.fdb)
fpdb_simple.analyzeDB(self.fdb) fpdb_simple.analyzeDB(self.fdb)
return (totstored, totdups, totpartial, toterrors, tottime) return (totstored, totdups, totpartial, toterrors, tottime)
# else: import threaded
def calculate_auto(self):
"""An heuristic to determine a reasonable value of drop/don't drop"""
if len(self.filelist) == 1: return "don't drop"
if self.settings['handsInDB'] < 5000: return "drop"
if len(self.filelist) < 50: return "don't drop"
if self.settings['handsInDB'] > 50000: return "don't drop"
return "drop"
#Run import on updated files, then store latest update time. #Run import on updated files, then store latest update time.
def runUpdated(self): def runUpdated(self):