* Added encoding error handling and the ability to identify the worker responsible for splitting a particular file if several were launched via threading

This commit is contained in:
Chaz 2010-11-24 15:13:26 -06:00
parent 92c8e68914
commit ead567c5cc

View File

@ -1,19 +1,22 @@
#!/usr/bin/python #!/usr/bin/env python
# -*- coding: utf-8 -*-
#Copyright 2010 Chaz Littlejohn # Copyright 2010, Chaz Littlejohn
#This program is free software: you can redistribute it and/or modify #
#it under the terms of the GNU Affero General Public License as published by # This program is free software; you can redistribute it and/or modify
#the Free Software Foundation, version 3 of the License. # it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# #
# This program is distributed in the hope that it will be useful, # This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. # GNU General Public License for more details.
# #
#You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program; if not, write to the Free Software
#In the "official" distribution you can find the license in agpl-3.0.txt. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
########################################################################
import L10n import L10n
_ = L10n.get_translation() _ = L10n.get_translation()
@ -28,6 +31,7 @@ import Options
import Configuration import Configuration
from Exceptions import * from Exceptions import *
from cStringIO import StringIO from cStringIO import StringIO
import time
(options, argv) = Options.fpdb_options() (options, argv) = Options.fpdb_options()
@ -37,7 +41,7 @@ codepage = ["utf-16", "utf-8", "cp1252"]
class SplitHandHistory: class SplitHandHistory:
def __init__(self, config, in_path = '-', out_path = None, hands = 100, filter = "PokerStarsToFpdb", archive = False): def __init__(self, config, in_path = '-', out_path = None, hands = 100, filter = "PokerStarsToFpdb", archive = False, workerid = 0):
self.config = config self.config = config
self.in_path = in_path self.in_path = in_path
self.out_path = out_path self.out_path = out_path
@ -50,22 +54,25 @@ class SplitHandHistory:
self.line_addendum = None self.line_addendum = None
self.filedone = False self.filedone = False
self.timestamp = str(time.time())
self.workerid = '%02d' % workerid
#Acquire re_SplitHands for this hh #Acquire re_SplitHands for this hh
filter_name = filter.replace("ToFpdb", "") self.filter_name = filter.replace("ToFpdb", "")
mod = __import__(filter) mod = __import__(filter)
obj = getattr(mod, filter_name, None) obj = getattr(mod, self.filter_name, None)
self.re_SplitHands = obj.re_SplitHands self.re_SplitHands = obj.re_SplitHands
#Determine line delimiter type if any #Determine line delimiter type if any
if self.re_SplitHands.match('\n\n'):
self.line_delimiter = '\n\n'
if self.re_SplitHands.match('\n\n\n'): if self.re_SplitHands.match('\n\n\n'):
self.line_delimiter = '\n\n\n' self.line_delimiter = '\n\n\n'
if self.re_SplitHands.match('\n\n'):
self.line_delimiter = '\n\n'
#Add new line addendum for sites which match SplitHand to next line as well #Add new line addendum for sites which match SplitHand to next line as well
if filter_name == 'OnGame': if self.filter_name == 'OnGame':
self.line_addendum = '*' self.line_addendum = '*'
if filter_name == 'Carbon': if self.filter_name == 'Carbon':
self.line_addendum = '<game' self.line_addendum = '<game'
#Open the gargantuan file #Open the gargantuan file
@ -75,6 +82,7 @@ class SplitHandHistory:
except IOError: except IOError:
print _('File not found') print _('File not found')
sys.exit(2) sys.exit(2)
self.kodec = kodec
#Split with do_hands_per_file if archive and paragraphs if a regular hh #Split with do_hands_per_file if archive and paragraphs if a regular hh
if self.archive: if self.archive:
@ -105,9 +113,10 @@ class SplitHandHistory:
print _('Nope, will not work (fileno=%d)' % fileno) print _('Nope, will not work (fileno=%d)' % fileno)
sys.exit(2) sys.exit(2)
basename = os.path.splitext(os.path.basename(self.in_path))[0] basename = os.path.splitext(os.path.basename(self.in_path))[0]
name = os.path.join(self.out_path, basename+'-%06d.txt' % fileno) name = os.path.join(self.out_path, self.filter_name+'-'+basename+'_'+self.workerid+'_'+self.timestamp+'_%06d.txt' % fileno)
print '-> %s' % name print '-> %s' % name
newfile = file(name, 'w') newfile = file(name, 'w')
os.chmod(name, 0775)
return newfile return newfile
#Archive Hand Splitter #Archive Hand Splitter
@ -122,8 +131,11 @@ class SplitHandHistory:
except FpdbEndOfFile: except FpdbEndOfFile:
done = True done = True
break break
except UnicodeEncodeError:
print _('Absurd character done messed you up')
sys.exit(2)
except: except:
print _("Unexpected error processing file") print _('Unexpected error processing file')
sys.exit(2) sys.exit(2)
n += 1 n += 1
outfile.close() outfile.close()
@ -174,7 +186,7 @@ class SplitHandHistory:
l = infile.readline() l = infile.readline()
l = l.replace('\r\n', '\n') l = l.replace('\r\n', '\n')
outfile.write(l) outfile.write(l)
l = infile.readline() l = infile.readline().encode(self.kodec)
while len(l) < 3: while len(l) < 3:
l = infile.readline() l = infile.readline()
@ -182,7 +194,7 @@ class SplitHandHistory:
while len(l) > 2: while len(l) > 2:
l = l.replace('\r\n', '\n') l = l.replace('\r\n', '\n')
outfile.write(l) outfile.write(l)
l = infile.readline() l = infile.readline().encode(self.kodec)
outfile.write(self.line_delimiter) outfile.write(self.line_delimiter)
return infile return infile
@ -196,12 +208,18 @@ def main(argv=None):
if argv is None: if argv is None:
argv = sys.argv[1:] argv = sys.argv[1:]
if not options.filename:
options.filename = sys.argv[1]
if not options.config: if not options.config:
options.config = Configuration.Config(file = "HUD_config.test.xml") options.config = sys.argv[2]
if sys.argv[3] == "True":
options.archive = True
if options.filename: if options.filename:
SplitHH = SplitHandHistory(options.config, options.filename, options.outpath, options.hands, SplitHH = SplitHandHistory(options.config, options.filename, options.outpath, options.hands,
options.hhc, options.archive) options.hhc, options.archive, options.workerid)
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())