From 18140defd8bbacfb8dce88a8e1774237c0c8a9f3 Mon Sep 17 00:00:00 2001 From: Ray Date: Sat, 7 Feb 2009 10:06:48 -0500 Subject: [PATCH] Update bulk import, optionally drop indexes, improve gui --- pyfpdb/GuiBulkImport.py | 340 +++++++++++++++++++++++----------------- pyfpdb/fpdb_import.py | 63 ++++++-- 2 files changed, 244 insertions(+), 159 deletions(-) diff --git a/pyfpdb/GuiBulkImport.py b/pyfpdb/GuiBulkImport.py index 250bd313..6f06f232 100644 --- a/pyfpdb/GuiBulkImport.py +++ b/pyfpdb/GuiBulkImport.py @@ -1,156 +1,212 @@ #!/usr/bin/python +# -*- coding: utf-8 -*- -#Copyright 2008 Steffen Jobbagy-Felso -#This program is free software: you can redistribute it and/or modify -#it under the terms of the GNU Affero General Public License as published by -#the Free Software Foundation, version 3 of the License. +# Copyright 2008 Steffen Jobbagy-Felso +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. # -#This program is distributed in the hope that it will be useful, -#but WITHOUT ANY WARRANTY; without even the implied warranty of -#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -#GNU General Public License for more details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. # -#You should have received a copy of the GNU Affero General Public License -#along with this program. If not, see . -#In the "official" distribution you can find the license in -#agpl-3.0.txt in the docs folder of the package. +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# In the "official" distribution you can find the license in +# agpl-3.0.txt in the docs folder of the package. -import threading -import fpdb_simple -import fpdb_import +# Standard Library modules +import os +from time import time + +# pyGTK modules import pygtk pygtk.require('2.0') import gtk -import os #todo: remove this once import_dir is in fpdb_import -from time import time -class GuiBulkImport (threading.Thread): - def import_dir(self): - """imports a directory, non-recursive. todo: move this to fpdb_import so CLI can use it""" - self.path=self.inputFile - self.importer.addImportDirectory(self.path) - self.importer.setCallHud(False) - starttime = time() - (stored, dups, partial, errs, ttime) = self.importer.runImport() - print "GuiBulkImport.import_dir done: Stored: %d Dupllicates: %d Partial: %d Errors: %d in %s seconds - %d/sec" %(stored, dups, partial, errs, ttime, (stored/ttime)) - - def load_clicked(self, widget, data=None): - self.inputFile=self.chooser.get_filename() - - self.handCount=self.hand_count_tbuffer.get_text(self.hand_count_tbuffer.get_start_iter(), self.hand_count_tbuffer.get_end_iter()) - if (self.handCount=="unlimited" or self.handCount=="Unlimited"): - self.importer.setHandCount(0) - else: - self.importer.setHandCount(int(self.handCount)) +# fpdb/FreePokerTools modules +import fpdb_simple +import fpdb_import +import fpdb_db +import Configuration - self.errorFile="failed.txt" - - self.minPrint=self.min_print_tbuffer.get_text(self.min_print_tbuffer.get_start_iter(), self.min_print_tbuffer.get_end_iter()) - if (self.minPrint=="never" or self.minPrint=="Never"): - self.importer.setMinPrint(0) - else: - self.importer.setMinPrint=int(self.minPrint) - - self.quiet=self.info_tbuffer.get_text(self.info_tbuffer.get_start_iter(), self.info_tbuffer.get_end_iter()) - if (self.quiet=="yes"): - self.importer.setQuiet(False) - else: - self.importer.setQuiet(True) - - self.failOnError=self.fail_error_tbuffer.get_text(self.fail_error_tbuffer.get_start_iter(), self.fail_error_tbuffer.get_end_iter()) - if (self.failOnError=="no"): - self.importer.setFailOnError(False) - else: - self.importer.setFailOnError(True) - - if os.path.isdir(self.inputFile): - self.import_dir() - else: - self.importer.addImportFile(self.inputFile) - self.importer.setCallHud(False) - self.importer.runImport() - self.importer.clearFileList() - - def get_vbox(self): - """returns the vbox of this thread""" - return self.vbox - #end def get_vbox - - def run (self): - print "todo: implement bulk import thread" - #end def run - - def __init__(self, db, settings, config): - self.db=db - self.settings=settings - self.config=config - self.importer = fpdb_import.Importer(self,self.settings, config) - - self.vbox=gtk.VBox(False,1) - self.vbox.show() - - self.chooser = gtk.FileChooserWidget() - self.chooser.set_filename(self.settings['bulkImport-defaultPath']) - #chooser.set_default_response(gtk.RESPONSE_OK) - #self.filesel.ok_button.connect_object("clicked", gtk.Widget.destroy, self.filesel) - self.vbox.add(self.chooser) - self.chooser.show() - - - self.settings_hbox = gtk.HBox(False, 0) - self.vbox.pack_end(self.settings_hbox, False, True, 0) - self.settings_hbox.show() - - self.hand_count_label = gtk.Label("Hands to import per file") - self.settings_hbox.add(self.hand_count_label) - self.hand_count_label.show() - - self.hand_count_tbuffer=gtk.TextBuffer() - self.hand_count_tbuffer.set_text("unlimited") - self.hand_count_tview=gtk.TextView(self.hand_count_tbuffer) - self.settings_hbox.add(self.hand_count_tview) - self.hand_count_tview.show() - - self.min_hands_label = gtk.Label("Status every") - self.settings_hbox.add(self.min_hands_label) - self.min_hands_label.show() - - self.min_print_tbuffer=gtk.TextBuffer() - self.min_print_tbuffer.set_text("never") - self.min_print_tview=gtk.TextView(self.min_print_tbuffer) - self.settings_hbox.add(self.min_print_tview) - self.min_print_tview.show() +class GuiBulkImport(): - - self.toggles_hbox = gtk.HBox(False, 0) - self.vbox.pack_end(self.toggles_hbox, False, True, 0) - self.toggles_hbox.show() + def import_dir(self): + """imports a directory, non-recursive. todo: move this to fpdb_import so CLI can use it""" - self.info_label = gtk.Label("Print start/end info:") - self.toggles_hbox.add(self.info_label) - self.info_label.show() - - self.info_tbuffer=gtk.TextBuffer() - self.info_tbuffer.set_text("yes") - self.info_tview=gtk.TextView(self.info_tbuffer) - self.toggles_hbox.add(self.info_tview) - self.info_tview.show() - - self.fail_error_label = gtk.Label("Fail on error:") - self.toggles_hbox.add(self.fail_error_label) - self.fail_error_label.show() - - self.fail_error_tbuffer=gtk.TextBuffer() - self.fail_error_tbuffer.set_text("no") - self.fail_error_tview=gtk.TextView(self.fail_error_tbuffer) - self.toggles_hbox.add(self.fail_error_tview) - self.fail_error_tview.show() + self.path = self.inputFile + self.importer.addImportDirectory(self.path) + self.importer.setCallHud(False) + starttime = time() + (stored, dups, partial, errs, ttime) = self.importer.runImport() + print 'GuiBulkImport.import_dir done: Stored: %d Duplicates: %d Partial: %d Errors: %d in %s seconds - %d/sec'\ + % (stored, dups, partial, errs, ttime, stored / ttime) - self.load_button = gtk.Button("Import") #todo: rename variables to import too - self.load_button.connect("clicked", self.load_clicked, "Import clicked") - self.toggles_hbox.add(self.load_button) - self.load_button.show() + def load_clicked(self, widget, data=None): +# get the dir to import from the chooser + self.inputFile = self.chooser.get_filename() - threading.Thread.__init__ ( self ) - print "initialised new bulk import thread (not actually a thread yet)" -#end class import_threaded +# get the import settings from the gui and save in the importer + self.importer.setHandCount(int(self.spin_hands.get_text())) + self.importer.setMinPrint(int(self.spin_hands.get_text())) + self.importer.setQuiet(self.chk_st_st.get_active()) + self.importer.setFailOnError(self.chk_fail.get_active()) + self.importer.setThreads(int(self.spin_threads.get_text())) + self.importer.setHandsInDB(self.n_hands_in_db) + cb_model = self.cb.get_model() + cb_index = self.cb.get_active() + if cb_index: + self.importer.setDropIndexes(cb_model[cb_index][0]) + else: + self.importer.setDropIndexes("auto") + + self.lab_info.set_text("Importing") + if os.path.isdir(self.inputFile): + self.import_dir() + else: + self.importer.addImportFile(self.inputFile) + self.importer.setCallHud(False) + self.importer.runImport() + self.importer.clearFileList() + + self.lab_info.set_text("Import finished") + + def __init__(self, db, settings, config): + self.db = db # this is an instance of fpdb_db + self.settings = settings + self.config = config + self.importer = fpdb_import.Importer(self, self.settings, + config) + + self.vbox = gtk.VBox(False, 0) + self.vbox.show() + + self.chooser = gtk.FileChooserWidget() + self.chooser.set_filename(self.settings['bulkImport-defaultPath']) + self.vbox.add(self.chooser) + self.chooser.show() + +# Table widget to hold the settings + self.table = gtk.Table(rows = 3, columns = 5, homogeneous = False) + self.vbox.add(self.table) + self.table.show() + +# checkbox - print start/stop? + self.chk_st_st = gtk.CheckButton('Print Start/Stop Info') + self.table.attach(self.chk_st_st, 0, 1, 0, 1, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK) + self.chk_st_st.show() + self.chk_st_st.set_active(True) + +# label - status + self.lab_status = gtk.Label("Hands/status print:") + self.table.attach(self.lab_status, 1, 2, 0, 1, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK) + self.lab_status.show() + self.lab_status.set_justify(gtk.JUSTIFY_RIGHT) + +# spin button - status + status_adj = gtk.Adjustment(value=100, lower=0, upper=300, step_incr=10, page_incr=1, page_size=0) #not sure what upper value should be! + self.spin_status = gtk.SpinButton(adjustment=status_adj, climb_rate=0.0, digits=0) + self.table.attach(self.spin_status, 2, 3, 0, 1, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK) + self.spin_status.show() + +# label - threads + self.lab_threads = gtk.Label("Number of threads:") + self.table.attach(self.lab_threads, 3, 4, 0, 1, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK) + self.lab_threads.show() + self.lab_threads.set_sensitive(False) + self.lab_threads.set_justify(gtk.JUSTIFY_RIGHT) + +# spin button - threads + threads_adj = gtk.Adjustment(value=0, lower=0, upper=10, step_incr=1, page_incr=1, page_size=0) #not sure what upper value should be! + self.spin_threads = gtk.SpinButton(adjustment=threads_adj, climb_rate=0.0, digits=0) + self.table.attach(self.spin_threads, 4, 5, 0, 1, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK) + self.spin_threads.show() + self.spin_threads.set_sensitive(False) + +# checkbox - fail on error? + self.chk_fail = gtk.CheckButton('Fail on error') + self.table.attach(self.chk_fail, 0, 1, 1, 2, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK) + self.chk_fail.show() + +# label - hands + self.lab_hands = gtk.Label("Hands/file:") + self.table.attach(self.lab_hands, 1, 2, 1, 2, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK) + self.lab_hands.show() + self.lab_hands.set_justify(gtk.JUSTIFY_RIGHT) + +# spin button - hands to import + hands_adj = gtk.Adjustment(value=0, lower=0, upper=10, step_incr=1, page_incr=1, page_size=0) #not sure what upper value should be! + self.spin_hands = gtk.SpinButton(adjustment=hands_adj, climb_rate=0.0, digits=0) + self.table.attach(self.spin_hands, 2, 3, 1, 2, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK) + self.spin_hands.show() + +# label - drop indexes + self.lab_drop = gtk.Label("Drop indexes:") + self.table.attach(self.lab_drop, 3, 4, 1, 2, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK) + self.lab_drop.show() + self.lab_drop.set_justify(gtk.JUSTIFY_RIGHT) + +# ComboBox - drop indexes + self.cb = gtk.combo_box_new_text() + self.cb.append_text('auto') + self.cb.append_text("don't drop") + self.cb.append_text('drop') + self.cb.set_active(0) + self.table.attach(self.cb, 4, 5, 1, 2, xpadding = 10, ypadding = 0, yoptions=gtk.SHRINK) + self.cb.show() + +# label - info + self.lab_info = gtk.Label() + self.table.attach(self.lab_info, 0, 4, 2, 3, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK) + self.lab_info.show() + +# button - Import + self.load_button = gtk.Button('Import') # todo: rename variables to import too + self.load_button.connect('clicked', self.load_clicked, + 'Import clicked') + self.table.attach(self.load_button, 4, 5, 2, 3, xpadding = 0, ypadding = 0, yoptions=gtk.SHRINK) + self.load_button.show() + +# see how many hands are in the db and adjust accordingly + db_parms = config.get_db_parameters('fpdb') + db.connect(db_parms['db-backend'], + db_parms['db-host'], + db_parms['db-databaseName'], + db_parms['db-user'], + db_parms['db-password']) + cursor = db.db.cursor() + cursor.execute("Select max(id) from Hands;") + row = cursor.fetchone() + db.disconnect() # that's all we need this for + self.n_hands_in_db = row[0] + if self.n_hands_in_db == 0: + self.cb.set_active(2) + self.cb.set_sensitive(False) + self.lab_drop.set_sensitive(False) + +if __name__ == '__main__': + + + def destroy(*args): # call back for terminating the main eventloop + gtk.main_quit() + + config = Configuration.Config() + db = fpdb_db.fpdb_db() + + settings = {} + if os.name == 'nt': settings['os'] = 'windows' + else: settings['os'] = 'linuxmac' + + settings.update(config.get_db_parameters('fpdb')) + settings.update(config.get_tv_parameters()) + settings.update(config.get_import_parameters()) + settings.update(config.get_default_paths()) + + i = GuiBulkImport(db, settings, config) + main_window = gtk.Window() + main_window.connect('destroy', destroy) + main_window.add(i.vbox) + main_window.show() + gtk.main() diff --git a/pyfpdb/fpdb_import.py b/pyfpdb/fpdb_import.py index aa68c94f..220abe3c 100644 --- a/pyfpdb/fpdb_import.py +++ b/pyfpdb/fpdb_import.py @@ -17,8 +17,24 @@ #see status.txt for site/games support info -import sys +# Standard Library modules +import os # todo: remove this once import_dir is in fpdb_import +import sys +from time import time +import traceback +import math +import datetime +import re + +# fpdb/FreePokerTools modules + +import fpdb_simple +import fpdb_db +import fpdb_parse_logic +import Configuration + +# database interface modules try: import MySQLdb mysqlLibFound=True @@ -31,16 +47,6 @@ try: except: pass -import traceback -import math -import os -import datetime -import re -import fpdb_db -import fpdb_simple -import fpdb_parse_logic -from time import time - class Importer: def __init__(self, caller, settings, config): @@ -60,10 +66,8 @@ class Importer: #Set defaults self.callHud = self.config.get_import_parameters().get("callFpdbHud") if 'minPrint' not in self.settings: - #TODO: Is this value in the xml file? self.settings['minPrint'] = 30 if 'handCount' not in self.settings: - #TODO: Is this value in the xml file? self.settings['handCount'] = 0 self.fdb = fpdb_db.fpdb_db() # sets self.fdb.db self.fdb.cursor and self.fdb.sql self.fdb.do_connect(self.config) @@ -84,6 +88,15 @@ class Importer: def setFailOnError(self, value): self.settings['failOnError'] = value + def setHandsInDB(self, value): + self.settings['handsInDB'] = value + + def setThreads(self, value): + self.settings['threads'] = value + + def setDropIndexes(self, value): + self.settings['dropIndexes'] = value + # def setWatchTime(self): # self.updated = time() @@ -92,7 +105,7 @@ class Importer: #Add an individual file to filelist def addImportFile(self, filename, site = "default", filter = "passthrough"): - #TODO: test it is a valid file + #TODO: test it is a valid file -> put that in config!! self.filelist[filename] = [site] + [filter] #Add a directory of files to filelist @@ -110,14 +123,20 @@ class Importer: else: print "Warning: Attempted to add non-directory: '" + str(dir) + "' as an import directory" - #Run full import on filelist def runImport(self): - fpdb_simple.prepareBulkImport(self.fdb) + """"Run full import on self.filelist.""" + start = datetime.datetime.now() + print "started at", start, "--", len(self.filelist), "files to import.", self.settings['dropIndexes'] + if self.settings['dropIndexes'] == 'auto': + self.settings['dropIndexes'] = self.calculate_auto() + if self.settings['dropIndexes'] == 'drop': + fpdb_simple.prepareBulkImport(self.fdb) totstored = 0 totdups = 0 totpartial = 0 toterrors = 0 tottime = 0 +# if threads <= 1: do this bit for file in self.filelist: (stored, duplicates, partial, errors, ttime) = self.import_file_dict(file, self.filelist[file][0], self.filelist[file][1]) totstored += stored @@ -125,9 +144,19 @@ class Importer: totpartial += partial toterrors += errors tottime += ttime - fpdb_simple.afterBulkImport(self.fdb) + if self.settings['dropIndexes'] == 'drop': + fpdb_simple.afterBulkImport(self.fdb) fpdb_simple.analyzeDB(self.fdb) return (totstored, totdups, totpartial, toterrors, tottime) +# else: import threaded + + def calculate_auto(self): + """An heuristic to determine a reasonable value of drop/don't drop""" + if len(self.filelist) == 1: return "don't drop" + if self.settings['handsInDB'] < 5000: return "drop" + if len(self.filelist) < 50: return "don't drop" + if self.settings['handsInDB'] > 50000: return "don't drop" + return "drop" #Run import on updated files, then store latest update time. def runUpdated(self):