ibus-typing-booster November 2013

ibus-typing-booster@lists.fedorahosted.org

2 participants
25 discussions

[ibus-typing-booster] miketmp-debug: add sys.stderr.write("mike ..." debug messages) (ee4c84d)
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git On branch : miketmp-debug >--------------------------------------------------------------- commit ee4c84d0fa44404b8dfcf23a684984bf245eda17 Author: Mike FABIAN <mfabian(a)redhat.com> Date: Wed Jun 12 15:55:07 2013 +0200 add sys.stderr.write("mike ..." debug messages) >--------------------------------------------------------------- ibus-typing-booster/engine/factory.py | 1 + ibus-typing-booster/engine/hunspell_suggest.py | 3 +++ ibus-typing-booster/engine/hunspell_table.py | 19 +++++++++++++++++++ ibus-typing-booster/engine/tabsqlitedb.py | 19 +++++++++++++++++++ 4 files changed, 42 insertions(+), 0 deletions(-) diff --git a/ibus-typing-booster/engine/factory.py b/ibus-typing-booster/engine/factory.py index 61579ab..b474991 100755 --- a/ibus-typing-booster/engine/factory.py +++ b/ibus-typing-booster/engine/factory.py @@ -22,6 +22,7 @@ from gi.repository import IBus import hunspell_table import tabsqlitedb import os +import sys from re import compile as re_compile path_patt = re_compile(r'[^a-zA-Z0-9_/]') diff --git a/ibus-typing-booster/engine/hunspell_suggest.py b/ibus-typing-booster/engine/hunspell_suggest.py index 6a24fea..2602f02 100755 --- a/ibus-typing-booster/engine/hunspell_suggest.py +++ b/ibus-typing-booster/engine/hunspell_suggest.py @@ -100,6 +100,7 @@ class Hunspell: self.pyhunspell_object = None def words_start(self,word): + sys.stderr.write("mike in words_start word=%s\n" %word.encode('UTF-8')) if type(word) != type(u''): word = word.decode('utf8') # http://pwet.fr/man/linux/fichiers_speciaux/hunspell says: @@ -143,6 +144,7 @@ class Hunspell: start_words.append(suggestion) else: start_words = [u'☹ %(loc)s%(dict_name)s not found.' %{'loc': self.loc, 'dict_name': self.dict_name}, u'☹ please install hunspell dictionary!'] +# sys.stderr.write("mike words=%(w)s\n" %{'w': list(set(start_words[0:max_words]))}) return list(set(start_words[0:max_words])) def suggest(self, input_phrase): @@ -155,6 +157,7 @@ class Hunspell: # to match words longer than that just wastes time. if len(input_phrase) > 40: return [] +# sys.stderr.write("mike in suggest word=%(ip)s\n" %{'ip': input_phrase.encode('UTF-8')}) return self.words_start(input_phrase) diff --git a/ibus-typing-booster/engine/hunspell_table.py b/ibus-typing-booster/engine/hunspell_table.py index d02fc36..f308632 100644 --- a/ibus-typing-booster/engine/hunspell_table.py +++ b/ibus-typing-booster/engine/hunspell_table.py @@ -22,6 +22,7 @@ __all__ = ( ) import os +import sys import string import unicodedata import curses.ascii @@ -85,6 +86,7 @@ class editor(object): '''Hold user inputs chars and preedit string''' def __init__ (self, config, database): + sys.stderr.write("mike editor __init__\n") self.db = database self._config = config self._name = self.db.ime_properties.get('name') @@ -141,6 +143,7 @@ class editor(object): self.trans_m17n_mode = True try: #self.trans = Translit.Transliterator.get(self._m17ndb, self._current_ime) + sys.stderr.write("mike calling Transliterator.get(%(m17n)s, %(cur)s)\n" %{'m17n': self._m17ndb, 'cur': self._current_ime}) self.trans = Transliterator.get(self._m17ndb, self._current_ime) except: import traceback @@ -168,12 +171,19 @@ class editor(object): 'NFKD', self._transliterated_string) else: self._transliterated_string = self._typed_string + sys.stderr.write("mike in update_transliterated_string() self._typed_string=%s\n" %self._typed_string.encode('UTF-8')) + sys.stderr.write("mike in update_transliterated_string() repr(self._typed_string)=%s\n" %repr(self._typed_string)) + sys.stderr.write("mike in update_transliterated_string() self._transliterated_string=%s\n" %self._transliterated_string.encode('UTF-8')) + sys.stderr.write("mike in update_transliterated_string() repr(self._transliterated_string)=%s\n" %repr(self._transliterated_string)) def get_transliterated_string(self): return self._transliterated_string def insert_string_at_cursor(self, string_to_insert): '''Insert typed string at cursor position''' + sys.stderr.write("mike in insert_string_at_cursor() string_to_insert=%s\n" %string_to_insert.encode('UTF-8')) + sys.stderr.write("mike in insert_string_at_cursor() self._typed_string=%s\n" %self._typed_string.encode('UTF-8')) + sys.stderr.write("mike in insert_string_at_cursor() self._typed_string_cursor=%s\n" %self._typed_string_cursor) self._typed_string = self._typed_string[:self._typed_string_cursor] \ +string_to_insert \ +self._typed_string[self._typed_string_cursor:] @@ -330,6 +340,7 @@ class editor(object): def update_candidates (self): '''Update lookuptable''' + sys.stderr.write("mike in update_candidates() self._typed_string=%s\n" %self._typed_string.encode('UTF-8')) if self._typed_string == self._typed_string_when_update_candidates_was_last_called: # The input did not change since we came here last, do nothing and leave # candidates and lookup table unchanged: @@ -499,6 +510,7 @@ class tabengine (IBus.Engine): '''The IM Engine for Tables''' def __init__ (self, bus, obj_path, db ): + sys.stderr.write("mike in tabengine __init__() obj_path=%s\n" %obj_path) super(tabengine,self).__init__ (connection=bus.get_connection(),object_path=obj_path) global debug_level try: @@ -562,6 +574,7 @@ class tabengine (IBus.Engine): def _change_mode (self): '''Shift input mode, TAB -> EN -> TAB ''' + sys.stderr.write("mike in hunspell_table.py _change_mode()\n") self.reset () self._update_ui () @@ -725,7 +738,10 @@ class tabengine (IBus.Engine): if self._has_input_purpose and self._input_purpose in [IBus.InputPurpose.PASSWORD, IBus.InputPurpose.PIN]: return False + sys.stderr.write("mike in process_key_event keyval=%(kv)s keycode=%(kc)s state=%(st)s\n" %{'kv': keyval, 'kc': keycode, 'st': state}) key = KeyEvent(keyval, state & IBus.ModifierType.RELEASE_MASK == 0, state) + sys.stderr.write("mike after KeyEvent() in process_key_event key.code=%(kc)s\n" %{'kc': key.code}) + sys.stderr.write("mike after KeyEvent() in process_key_event key.code=%(kc)s IBus.keyval_to_unicode(key.code)=%(uc)s\n" %{'kc': key.code, 'uc': IBus.keyval_to_unicode(key.code)}) # ignore NumLock mask key.mask &= ~IBus.ModifierType.MOD2_MASK @@ -746,6 +762,9 @@ class tabengine (IBus.Engine): return True if self._editor.is_empty (): + sys.stderr.write("mike self._editor.is_empty ():\n") + sys.stderr.write("mike key.code=%(key.code)s IBus.keyval_to_unicode(key.code)=%(keychar)s\n" %{'key.code': key.code, 'keychar': IBus.keyval_to_unicode(key.code)}) + sys.stderr.write("mike IBus.keyval_name(key.code)=%s\n" %IBus.keyval_name(key.code)) # This is the first character typed since the last commit # there is nothing in the preëdit yet. if key.code < 32: diff --git a/ibus-typing-booster/engine/tabsqlitedb.py b/ibus-typing-booster/engine/tabsqlitedb.py index 2f55a66..4864ddb 100755 --- a/ibus-typing-booster/engine/tabsqlitedb.py +++ b/ibus-typing-booster/engine/tabsqlitedb.py @@ -240,6 +240,8 @@ class tabsqlitedb: 'input_phrase': input_phrase, 'phrase': phrase, 'p_phrase': p_phrase, 'pp_phrase': pp_phrase, 'timestamp': time.time()} + sys.stderr.write("mike update_phrase() sqlstr=%s\n" %sqlstr) + sys.stderr.write("mike update_phrase() sqlargs=%s\n" %sqlargs) try: self.db.execute(sqlstr, sqlargs) if commit: @@ -252,8 +254,10 @@ class tabsqlitedb: ''' Trigger a checkpoint operation. ''' + sys.stderr.write("mike sync_userdb() commit and execute checkpoint ...\n") self.db.commit() self.db.execute('PRAGMA wal_checkpoint;') + sys.stderr.write("mike sync_userdb() commit and execute checkpoint done.\n") def create_tables (self, database): '''Create table for the phrases.''' @@ -268,6 +272,7 @@ class tabsqlitedb: ''' Add phrase to database ''' + sys.stderr.write("mike in add_phrase() input_phrase=%(ip)s phrase=%(p)s user_freq=%(uf)s database=%(db)s\n" %{'ip': input_phrase.encode('UTF-8'), 'p': phrase.encode('UTF-8'), 'uf': user_freq, 'db': database}) if not input_phrase or not phrase: return input_phrase = unicodedata.normalize( @@ -298,6 +303,8 @@ class tabsqlitedb: insert_sqlargs = {'input_phrase': input_phrase, 'phrase': phrase, 'p_phrase': p_phrase, 'pp_phrase': pp_phrase, 'user_freq': user_freq, 'timestamp': time.time()} + sys.stderr.write("mike add_phrase() insert_sqlstr=%s\n" %insert_sqlstr) + sys.stderr.write("mike add_phrase() insert_sqlargs=%s\n" %insert_sqlargs) try: self.db.execute (insert_sqlstr, insert_sqlargs) if commit: @@ -370,8 +377,10 @@ class tabsqlitedb: self._normalization_form_internal, p_phrase) pp_phrase = unicodedata.normalize( self._normalization_form_internal, pp_phrase) + sys.stderr.write("mike in select_words() input_phrase=%(ip)s p_phrase=%(p)s pp_phrase=%(pp)s\n" %{'ip': input_phrase.encode('UTF-8'), 'p': p_phrase.encode('UTF-8'), 'pp': pp_phrase.encode('UTF-8')}) phrase_frequencies = {} map(lambda x: phrase_frequencies.update([(x, 0)]), self.hunspell_obj.suggest(input_phrase)) + sys.stderr.write("mike in select_words() hunspell: best_candidates=%s\n" %self.best_candidates(phrase_frequencies)) # Now phrase_frequencies might contain something like this: # # {u'code': 0, u'communicability': 0, u'cold': 0, u'colour': 0} @@ -437,6 +446,7 @@ class tabsqlitedb: # Updating the phrase_frequency dictionary with the normalized results gives: # {u'conspiracy': 6/11, u'code': 0, u'communicability': 0, u'cold': 1/11, u'colour': 4/11} map(lambda x: phrase_frequencies.update([(x[0], x[1]/float(count))]), results_uni) + sys.stderr.write("mike in select_words() Unigram best_candidates=%s\n" %self.best_candidates(phrase_frequencies)) if not p_phrase: # If no context for bigram matching is available, return what we have so far: return self.best_candidates(phrase_frequencies) @@ -460,6 +470,7 @@ class tabsqlitedb: # combination of the unigram and the bigram results, giving # both the weight of 0.5: map(lambda x: phrase_frequencies.update([(x[0], 0.5*x[1]/float(count_p_phrase)+0.5*phrase_frequencies[x[0]])]), results_bi) + sys.stderr.write("mike in select_words() Bigram best_candidates=%s\n" %self.best_candidates(phrase_frequencies)) if not pp_phrase: # If no context for trigram matching is available, return what we have so far: return self.best_candidates(phrase_frequencies) @@ -485,6 +496,7 @@ class tabsqlitedb: # unigram + 0.25 * bigram + 0.5 * trigram, i.e. the trigrams # get higher weight): map(lambda x: phrase_frequencies.update([(x[0], 0.5*x[1]/float(count_pp_phrase_p_phrase)+0.5*phrase_frequencies[x[0]])]), results_tri) + sys.stderr.write("mike in select_words() Trigram best_candidates=%s\n" %self.best_candidates(phrase_frequencies)) return self.best_candidates(phrase_frequencies) def generate_userdb_desc (self): @@ -590,6 +602,8 @@ CREATE TABLE phrases (id INTEGER PRIMARY KEY, input_phrase TEXT, phrase TEXT, p_ input_phrase = unicodedata.normalize( self._normalization_form_internal, input_phrase) + sys.stderr.write("mike check_phrase() phrase=%(p)s, input_phrase=%(t)s, database=%(d)s\n" %{'p': phrase.encode('UTF-8'), 't': input_phrase.encode('UTF-8'), 'd': database}) + # There should never be more than 1 database row for the same # input_phrase *and* phrase. So the following query on # the database should match at most one database @@ -606,7 +620,10 @@ CREATE TABLE phrases (id INTEGER PRIMARY KEY, input_phrase TEXT, phrase TEXT, p_ ;''' sqlargs = {'input_phrase': input_phrase, 'phrase': phrase, 'p_phrase': p_phrase, 'pp_phrase': pp_phrase} + sys.stderr.write("mike sqlstr=%s\n" %sqlstr) + sys.stderr.write("mike sqlargs=%s\n" %sqlargs) result = self.db.execute(sqlstr, sqlargs).fetchall() + sys.stderr.write("mike result=%s\n" %result) if len(result) > 0: # A match was found in user_db, increase user frequency by 1 self.update_phrase(input_phrase = input_phrase, @@ -628,6 +645,8 @@ CREATE TABLE phrases (id INTEGER PRIMARY KEY, input_phrase TEXT, phrase TEXT, p_ Or, if “input_phrase” is “None”, remove all rows matching “phrase” no matter for what input phrase from the database. ''' + sys.stderr.write("mike remove_phrase() phrase=%(p)s\n" %{'p': phrase.encode('UTF-8')}) + sys.stderr.write("mike remove_phrase() database=%s\n" %database) if not phrase: return phrase = unicodedata.normalize(

1 0

[ibus-typing-booster] miketmp-debug: WIP: single user database (fe75df8)
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

1 0

[ibus-typing-booster] miketmp-debug: WIP timestamps (57769ba)
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

1 0

[ibus-typing-booster] miketmp-debug: WIP: use multiple dictionaries (f17968e)
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git On branch : miketmp-debug >--------------------------------------------------------------- commit f17968e455ae6edbcc97dfb5fac27f082bc8256c Author: Mike FABIAN <mfabian(a)redhat.com> Date: Fri Oct 25 15:14:26 2013 +0200 WIP: use multiple dictionaries >--------------------------------------------------------------- ibus-typing-booster/engine/hunspell_suggest.py | 157 ++++++++++++------------ ibus-typing-booster/engine/tabsqlitedb.py | 2 +- 2 files changed, 79 insertions(+), 80 deletions(-) diff --git a/ibus-typing-booster/engine/hunspell_suggest.py b/ibus-typing-booster/engine/hunspell_suggest.py index 2602f02..1d97bb1 100755 --- a/ibus-typing-booster/engine/hunspell_suggest.py +++ b/ibus-typing-booster/engine/hunspell_suggest.py @@ -37,72 +37,85 @@ except: max_words = 100 max_words_row = 50 -class Hunspell: - def __init__(self,loc='/usr/share/myspell/',dict_name='en_US'): - self.normalization_form_internal = 'NFD' - self.loc = loc - self.dict_name = dict_name + '.dic' - self.aff_name = dict_name + '.aff' +normalization_form_internal = 'NFD' + +class Dictionary: + def __init__(self, name=u'en_US'): + self.loc = '/usr/share/myspell' + self.name = name self.encoding = 'UTF-8' - self.dict_buffer = None - self.aff_buffer = None + self.buffer = None + self.pyhunspell_object = None self.load_dictionary() def load_dictionary(self): - self.encoding = 'UTF-8' - self.dict_buffer = None - self.aff_buffer = None - self.pyhunspell_object = None print "load_dictionary() ..." - if not os.path.isfile(self.loc+self.dict_name) or not os.path.isfile(self.loc+self.aff_name): - print "load_dictionary(): .dic or .aff file missing." + dic_path = os.path.join(self.loc, self.name+'.dic') + aff_path = os.path.join(self.loc, self.name+'.aff') + if not os.path.isfile(dic_path) or not os.path.isfile(aff_path): + print("load_dictionary %(n)s: %(d)s %(a)s file missing." + %{'n': self.name, 'd': dic_path, 'a': aff_path}) return try: - self.aff_buffer = open( - self.loc+self.aff_name).read().replace('\r\n', '\n') + aff_buffer = open(aff_path).read().replace('\r\n', '\n') except: import traceback traceback.print_exc() - if self.aff_buffer: + if aff_buffer: encoding_pattern = re.compile( r'^[\s]*SET[\s]+(?P<encoding>[-a-zA-Z0-9_]+)[\s]*$', re.MULTILINE|re.UNICODE) - match = encoding_pattern.search(self.aff_buffer) + match = encoding_pattern.search(aff_buffer) if match: self.encoding = match.group('encoding') print "load_dictionary(): encoding=%(enc)s found in %(aff)s" %{ - 'enc': self.encoding, 'aff': self.loc+self.aff_name} + 'enc': self.encoding, 'aff': aff_path} try: - self.dict_buffer = codecs.open( - self.loc+self.dict_name).read().decode(self.encoding).replace('\r\n', '\n') + self.buffer = codecs.open( + dic_path).read().decode(self.encoding).replace('\r\n', '\n') except: print "load_dictionary(): loading %(dic)s as %(enc)s encoding failed, fall back to ISO-8859-1." %{ - 'dic': self.loc+self.dict_name, 'enc': self.encoding} + 'dic': dic_path, 'enc': self.encoding} self.encoding = 'ISO-8859-1' try: - self.dict_buffer = codecs.open( - self.loc+self.dict_name).read().decode(self.encoding).replace('\r\n', '\n') + self.buffer = codecs.open( + dic_path).read().decode(self.encoding).replace('\r\n', '\n') except: - print "load_dictionary(): loading %(dic)s as %(enc)s encoding failed, giving up." %{ - 'dic': self.loc+self.dict_name, 'enc': self.encoding} - self.dict_buffer = None - self.aff_buffer = None + print("load_dictionary(): loading %(dic)s as %(enc)s encoding failed, giving up." %{ + 'dic': dic_path, 'enc': self.encoding}) + self.buffer = None import traceback traceback.print_exc() - if self.dict_buffer: - self.dict_buffer = unicodedata.normalize( - self.normalization_form_internal, self.dict_buffer) - if import_hunspell_successful: - self.pyhunspell_object = hunspell.HunSpell( - self.loc+self.dict_name, - self.loc+self.aff_name) - else: - self.pyhunspell_object = None + return + if self.buffer: + self.buffer = unicodedata.normalize( + normalization_form_internal, self.buffer) + if import_hunspell_successful: + self.pyhunspell_object = hunspell.HunSpell( + dic_path, aff_path) + else: + self.pyhunspell_object = None + +class Hunspell: + def __init__(self, dictionary_names=['en_US']): + self.dictionaries = [] + print("mike dictionary_names=%s\n" %dictionary_names) + for dictionary_name in dictionary_names: + self.dictionaries.append(Dictionary(name=dictionary_name)) - def words_start(self,word): - sys.stderr.write("mike in words_start word=%s\n" %word.encode('UTF-8')) - if type(word) != type(u''): - word = word.decode('utf8') + def suggest(self, input_phrase): + # If the input phrase is very long, don’t try looking + # something up in the hunspell dictionaries. The regexp match + # gets very slow if the input phrase is very long. And there + # are no very long words in the hunspell dictionaries anyway, + # the longest word in the German hunspell dictionary currently + # seems to be “Geschwindigkeitsübertretungsverfahren” trying + # to match words longer than that just wastes time. + if len(input_phrase) > 40: + return [] + if type(input_phrase) != type(u''): + input_phrase = input_phrase.decode('utf8') + sys.stderr.write("mike in suggest input_phrase=%(ip)s\n" %{'ip': input_phrase.encode('UTF-8')}) # http://pwet.fr/man/linux/fichiers_speciaux/hunspell says: # # > A dictionary file (*.dic) contains a list of words, one per @@ -114,51 +127,37 @@ class Hunspell: # I.e. if '/' is already contained in the input, it cannot # match a word in the dictionary and we return an empty list # immediately: - if '/' in word: + if '/' in input_phrase: return [] # And we should not match further than '/'. # Take care to use a non-greedy regexp to match only # one line and not accidentally big chunks of the file! try: - regexp = r'^'+re.escape(word)+r'.*?(?=/|$)' + regexp = r'^'+re.escape(input_phrase)+r'.*?(?=/|$)' patt_start = re.compile(regexp,re.MULTILINE|re.UNICODE) except: import traceback traceback.print_exc() - if self.dict_buffer != None: - start_words = patt_start.findall(self.dict_buffer) - if self.pyhunspell_object != None: - if len(word) >= 4: - # Always pass NFC to pyhunspell and convert the - # result back to NFKD, even for Korean (For - # Korean, hunspell does a NFC -> NFKD conversion - # of the input and NFKD->NFC conversion of the - # output) - word = unicodedata.normalize('NFC', word) - extra_suggestions = map( - lambda x: unicodedata.normalize( - self.normalization_form_internal, x.decode(self.encoding)), - self.pyhunspell_object.suggest(word.encode(self.encoding, 'replace'))) - for suggestion in extra_suggestions: - if suggestion not in start_words: - start_words.append(suggestion) - else: - start_words = [u'☹ %(loc)s%(dict_name)s not found.' %{'loc': self.loc, 'dict_name': self.dict_name}, u'☹ please install hunspell dictionary!'] -# sys.stderr.write("mike words=%(w)s\n" %{'w': list(set(start_words[0:max_words]))}) - return list(set(start_words[0:max_words])) - - def suggest(self, input_phrase): - # If the input phrase is very long, don’t try looking - # something up in the hunspell dictionaries. The regexp match - # gets very slow if the input phrase is very long. And there - # are no very long words in the hunspell dictionaries anyway, - # the longest word in the German hunspell dictionary currently - # seems to be “Geschwindigkeitsübertretungsverfahren” trying - # to match words longer than that just wastes time. - if len(input_phrase) > 40: - return [] -# sys.stderr.write("mike in suggest word=%(ip)s\n" %{'ip': input_phrase.encode('UTF-8')}) - return self.words_start(input_phrase) - - + suggested_words = [] + for dictionary in self.dictionaries: + if dictionary.buffer: + suggested_words += patt_start.findall(dictionary.buffer) + if dictionary.pyhunspell_object: + if len(input_phrase) >= 4: + # Always pass NFC to pyhunspell and convert the + # result back to the internal normalization form (NFD) + # (hunspell does the right thing for Korean if the input is NFC). + input_phrase = unicodedata.normalize('NFC', input_phrase) + extra_suggestions = map( + lambda x: unicodedata.normalize( + normalization_form_internal, x.decode(dictionary.encoding)), + dictionary.pyhunspell_object.suggest(input_phrase.encode(dictionary.encoding, 'replace'))) + for suggestion in extra_suggestions: + if suggestion not in suggested_words: + suggested_words.append(suggestion) + else: + dic_path = os.path.join(dictionary.loc, dictionary.name+'.dic') + suggested_words.insert( + 0, u'☹ %(dic_path)s not found. Please install hunspell dictionary!' %{'dic_path': dic_path}) + return suggested_words[0:max_words] diff --git a/ibus-typing-booster/engine/tabsqlitedb.py b/ibus-typing-booster/engine/tabsqlitedb.py index 17e0ee8..a156027 100755 --- a/ibus-typing-booster/engine/tabsqlitedb.py +++ b/ibus-typing-booster/engine/tabsqlitedb.py @@ -90,7 +90,7 @@ class tabsqlitedb: self._normalization_form_internal = 'NFD' self.hunspell_obj = hunspell_suggest.Hunspell( - dict_name=self.ime_properties.get("hunspell_dict").replace('.dic', '')) + dictionary_names=[self.ime_properties.get("hunspell_dict").replace('.dic', '')]) #user_db = self.ime_properties.get("name")+'-user.db' user_db = 'user.db'

1 0

[ibus-typing-booster] branch 'miketmp-debug' created
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git New branch : miketmp-debug Referencing: f17968e455ae6edbcc97dfb5fac27f082bc8256c

1 0

[ibus-typing-booster] branch 'miketmp-debug' deleted
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git Deleted branch: miketmp-debug

1 0

[ibus-typing-booster] branch '1.2.7' deleted
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git Deleted branch: 1.2.7

1 0

[ibus-typing-booster] master's head updated: Release 1.2.7 (c75d0d2)
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git Branch 'master' now includes: b2c4916 Don’t strip characters with Unicode category “Cf” (Other, format) from tokens c75d0d2 Release 1.2.7

1 0

[ibus-typing-booster] 1.2.7: Release 1.2.7 (c75d0d2)
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git On branch : 1.2.7 >--------------------------------------------------------------- commit c75d0d2427ec1faeeadd46900c04abaf6190578b Author: Mike FABIAN <mfabian(a)redhat.com> Date: Wed Nov 20 13:46:17 2013 +0100 Release 1.2.7 >--------------------------------------------------------------- ibus-typing-booster/ChangeLog | 4 ++++ ibus-typing-booster/configure.ac | 2 +- ibus-typing-booster/ibus-typing-booster.pc.in | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ibus-typing-booster/ChangeLog b/ibus-typing-booster/ChangeLog index 7bf8559..44769dc 100644 --- a/ibus-typing-booster/ChangeLog +++ b/ibus-typing-booster/ChangeLog @@ -1,4 +1,8 @@ * Author: Mike FABIAN <mfabian(a)redhat.com> +* 20 Nov 2013 Released 1.2.7 version +* Don’t strip characters with Unicode category “Cf” (Other, format) from tokens (Resolves: rhbz#1032504) + +* Author: Mike FABIAN <mfabian(a)redhat.com> * 14 Nov 2013 Released 1.2.6 version * Change wording of the option to show the total number of candidates (Resolves: rhbz#1029748) * Commit candidate clicked on with the mouse (Resolves: rhbz#1029822) diff --git a/ibus-typing-booster/configure.ac b/ibus-typing-booster/configure.ac index 7128759..c277102 100755 --- a/ibus-typing-booster/configure.ac +++ b/ibus-typing-booster/configure.ac @@ -24,7 +24,7 @@ m4_define([package_name], [ibus-typing-booster]) m4_define([ibus_released], [1]) m4_define([ibus_major_version], [1]) m4_define([ibus_minor_version], [2]) -m4_define([ibus_micro_version], [6]) +m4_define([ibus_micro_version], [7]) m4_define(ibus_maybe_datestamp, m4_esyscmd([if test x]ibus_released[ != x1; then date +.%Y%m%d | tr -d '\n\r'; fi])) diff --git a/ibus-typing-booster/ibus-typing-booster.pc.in b/ibus-typing-booster/ibus-typing-booster.pc.in index 3f7a3bb..6d6e317 100755 --- a/ibus-typing-booster/ibus-typing-booster.pc.in +++ b/ibus-typing-booster/ibus-typing-booster.pc.in @@ -11,7 +11,7 @@ enginedir=/usr/share/ibus/engine Name: IBus-Typing-Booster Description: Table Based Input Method Framework for Intelligent Input Bus for Linux / Unix OS -Version: 1.2.6 +Version: 1.2.7 Requires: Libs: Cflags: \ No newline at end of file

1 0

[ibus-typing-booster] 1.2.7: Don��t strip characters with Unicode category ��Cf�� (Other, format) from tokens (b2c4916)
by mfabian＠fedoraproject.org 20 Nov '13

20 Nov '13

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git On branch : 1.2.7 >--------------------------------------------------------------- commit b2c49163dd549fe5c600d4ff429ac3aca4e87612 Author: Mike FABIAN <mfabian(a)redhat.com> Date: Wed Nov 20 13:26:08 2013 +0100 Don’t strip characters with Unicode category “Cf” (Other, format) from tokens See: https://bugzilla.redhat.com/show_bug.cgi?id=1032504 (Bug 1032504 - [ml_IN][ibus-typing-booster][F20] - 200D does not get committed in user db when it comes at the end) With ml-inscript transliteration: Hfnd]mCd] → ഫില്‍സണ്‍ Hfnd]mCd → ഫില്‍സണ് mfabian@ari:~ $ echo -n ഫില്‍സണ്‍ | iconv -f utf-8 -t utf16be | od -t x1 echo -n ഫില്‍സണ്‍ | iconv -f utf-8 -t utf16be | od -t x1 0000000 0d 2b 0d 3f 0d 32 0d 4d 20 0d 0d 38 0d 23 0d 4d 0000020 20 0d 0000022 mfabian@ari:~ $ echo -n ഫില്‍സണ് | iconv -f utf-8 -t utf16be | od -t x1 echo -n ഫില്‍സണ് | iconv -f utf-8 -t utf16be | od -t x1 0000000 0d 2b 0d 3f 0d 32 0d 4d 20 0d 0d 38 0d 23 0d 4d 0000020 mfabian@ari:~ $ “]” inserts U+200D ZERO WIDTH JOINER. As the example shows, stripping this off from the tokens to be saved in the user database is a bad idea. >--------------------------------------------------------------- ibus-typing-booster/engine/itb_util.py | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ibus-typing-booster/engine/itb_util.py b/ibus-typing-booster/engine/itb_util.py index d017fd3..4940983 100755 --- a/ibus-typing-booster/engine/itb_util.py +++ b/ibus-typing-booster/engine/itb_util.py @@ -33,9 +33,9 @@ import unicodedata # prevents learning such words from user input. I.e. the list of # categories to trigger immediate commit should contain only categories # which are very unlikely to appear as parts of words. -categories_to_trigger_immediate_commit = ['Po', 'Pi', 'Pf', 'Ps', 'Pe', 'Pc', 'Sm', 'Sc', 'Cf'] +categories_to_trigger_immediate_commit = ['Po', 'Pi', 'Pf', 'Ps', 'Pe', 'Pc', 'Sm', 'Sc'] -categories_to_strip_from_tokens = ['Po', 'Pi', 'Pf', 'Ps', 'Pe', 'Pc', 'Pd', 'Sm', 'Sc', 'Cf'] +categories_to_strip_from_tokens = ['Po', 'Pi', 'Pf', 'Ps', 'Pe', 'Pc', 'Pd', 'Sm', 'Sc'] def lstrip_token(token): token = token.lstrip()

1 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

ibus-typing-booster November 2013