** [[OOobbs2/193]] [#od09a570] -''サマリ'': 青空文庫の記号変換マクロ -''環境'': Writer -''状態'': 投稿 -''投稿者'': [[はにゃ?]] -''投稿日'': 2009-11-02 (月) 02:31:17 *** 質問 [#q9c072f9] ルビを変換。 #code(python){{ def rubytest(): import re from unicodedata import name as u_name vbar = u'\uff5c' # vertical bar str_exp = u'%s([^%s]*)%s' % (u'\u300a', u'\u300b', u'\u300b') # parenthesis exp = re.compile(str_exp) doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent() text = doc.getText() e_para = text.createEnumeration() while e_para.hasMoreElements(): para = e_para.nextElement() e_line = para.createEnumeration() while e_line.hasMoreElements(): line = e_line.nextElement() txt = line.getString() founds = exp.finditer(txt) l_e = 0 # eliminated number of chars in the line if founds: for found in founds: ruby = found.group(1) n = 0 e = False # find boundary of the ruby part for s in reversed(found.string[0:found.start(1)-1]): if s == vbar: e = True if u_name(s)[0:4] != 'CJK ': break n += 1 cursor = text.createTextCursorByRange(line.getStart()) # remove ruby text rc = len(ruby) + 2 cursor.goRight(found.start(0) - l_e, False) cursor.goRight(rc, True) cursor.setString(u'') cursor.collapseToStart() # set ruby cursor.goLeft(n, True) cursor.RubyText = ruby cursor.RubyAdjust = 4 # remove vbar if e: cursor.collapseToStart() cursor.goLeft(1, True) cursor.setString(u'') l_e += 1 l_e += rc }} 注釈のうち傍点のものを変換。 #code(python){{ def notetest_emphasis(): from com.sun.star.text.FontEmphasis import DOT_ABOVE as FE_DOT_ABOVE import re #print(u'%4x' % ord(u']')) str_exp = u'%s%s%s([^%s]*)%sに傍点%s' % ( u'\uff3b', u'\uff03', u'\u300c', u'\u300d', u'\u300d', u'\uff3d') exp = re.compile(str_exp) #print(str_exp) doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent() text = doc.getText() e_para = text.createEnumeration() while e_para.hasMoreElements(): para = e_para.nextElement() e_line = para.createEnumeration() while e_line.hasMoreElements(): line = e_line.nextElement() txt = line.getString() founds = exp.finditer(txt) if founds: l_e = 0 for found in founds: rc = len(found.group(0)) cursor = text.createTextCursorByRange(line.getStart()) cursor.goRight(found.start(0) - l_e, False) cursor.goRight(rc, True) cursor.setString(u'') cursor.collapseToStart() cursor.goLeft(len(found.group(1)), True) cursor.CharEmphasis = FE_DOT_ABOVE #cursor.CharBackColor = 16776960 l_e += rc }} 注釈のうち字下げのものを変換。字下げ指定によってはうまくいかないかも。 #code(python){{ def addIndentedParaStyle(doc): base_name = u'Preformatted Text' indented_name = u'Indented %s' % base_name sfamilies = doc.getStyleFamilies() styles = sfamilies.getByName(u'ParagraphStyles') if not styles.hasByName(indented_name): style = doc.createInstance(u'com.sun.star.style.ParagraphStyle') styles.insertByName(indented_name, style) style.setParentStyle(base_name) # increment the indent style.ParaLeftMargin = style.ParaTabStops[0].Position return indented_name def indenttest(): import re start_exp = re.compile(u'%s%sここから2字下げ%s' % ( u'\uff3b', u'\uff03', u'\uff3d')) end_exp = re.compile(u'%s%sここで字下げ終わり%s' % ( u'\uff3b', u'\uff03', u'\uff3d')) doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent() indented_style = addIndentedParaStyle(doc) text = doc.getText() started = False e_para = text.createEnumeration() while e_para.hasMoreElements(): para = e_para.nextElement() if started: if end_exp.match(para.getString()): para.setString(u'') started = False else: cursor = text.createTextCursorByRange(para.getStart()) cursor.ParaStyleName = indented_style else: # not started if start_exp.match(para.getString()): para.setString(u'') started = True }} 注釈の外字を置き換え。 #code(python){{ class mkt(): def __init__(self, encode='jisx0213'): """jisx0213 is the same as eucjis2004""" import codecs self.codec = codecs.lookup(encode) self.mn = 0xa0 def decode(self, mkt): parts = [int(c) + self.mn for c in mkt.split('-')] plen = len(parts) if plen == 3: if parts[0] == 0xa1: return self.codec.decode( chr(parts[1]) + chr(parts[2]))[0] elif parts[0] == 0xa2: return self.codec.decode( chr(0x8f) + chr(parts[1]) + chr(parts[2]))[0] return u'' def note_jisx0213(): import re str_exp = re.compile(u'※%s%s([^%s]*)%s' % ( u'\uff3b', u'\uff03', u'\uff3d', u'\uff3d')) code_exp = re.compile(u'[1-2]-\d{1,2}-\d{1,2}') mkt_coder = mkt() decoder = mkt_coder.decode doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent() text = doc.getText() e_para = text.createEnumeration() while e_para.hasMoreElements(): para = e_para.nextElement() e_line = para.createEnumeration() while e_line.hasMoreElements(): line = e_line.nextElement() txt = line.getString() founds = str_exp.finditer(txt) if founds: l_e = 0 for found in founds: code_found = code_exp.search(found.group(1)) if code_found: c = decoder(code_found.group(0)) rc = len(found.group(0)) - 1# * is replaced by the charactor cursor = text.createTextCursorByRange(line.getStart()) cursor.goRight(found.start(0) - l_e, False) cursor.goRight(rc + 1, True) cursor.setString(u'') cursor.setString(c) #cursor.CharBackColor = 16776960 l_e += rc }} 実行する順序は + 外字置換 + 傍点 + ルビ インデントは前後どちらでもよい。 テキストによっては注釈の書式があわないかもしれない。 *** 回答 [#hde047bb] #comment *** 感想,コメント,メモ [#y7ef35e3] #comment |