Top > OOobbs2 > 193

OOobbs2/193 Edit

  • サマリ: 青空文庫の記号変換マクロ
  • 環境: Writer
  • 状態: 投稿
  • 投稿者: はにゃ??
  • 投稿日: 2009-11-02 (月) 02:31:17

質問 Edit

ルビを変換。

  0
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
def rubytest():
    import re
    from unicodedata import name as u_name
    
    vbar = u'\uff5c' # vertical bar
    str_exp = u'%s([^%s]*)%s' % (u'\u300a', u'\u300b', u'\u300b') # parenthesis
    exp = re.compile(str_exp)
    
    doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
    text = doc.getText()
    
    e_para = text.createEnumeration()
    while e_para.hasMoreElements():
        para = e_para.nextElement()
        e_line = para.createEnumeration()
        while e_line.hasMoreElements():
            line = e_line.nextElement()
            txt = line.getString()
            founds = exp.finditer(txt)
            l_e = 0 # eliminated number of chars in the line
            
            if founds:
                for found in founds:
                    ruby = found.group(1)
                    n = 0
                    e = False
                    # find boundary of the ruby part
                    for s in reversed(found.string[0:found.start(1)-1]):
                        if s == vbar: e = True
                        if u_name(s)[0:4] != 'CJK ': break
                        n += 1
                    cursor = text.createTextCursorByRange(line.getStart())
                    
                    # remove ruby text
                    rc = len(ruby) + 2
                    cursor.goRight(found.start(0) - l_e, False)
                    cursor.goRight(rc, True)
                    cursor.setString(u'')
                    cursor.collapseToStart()
                    
                    # set ruby 
                    cursor.goLeft(n, True)
                    cursor.RubyText = ruby
                    cursor.RubyAdjust = 4
                    
                    # remove vbar
                    if e:
                        cursor.collapseToStart()
                        cursor.goLeft(1, True)
                        cursor.setString(u'')
                        l_e += 1
                    
                    l_e += rc

注釈のうち傍点のものを変換。

  0
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
def notetest_emphasis():
    from com.sun.star.text.FontEmphasis import DOT_ABOVE as FE_DOT_ABOVE
    import re
    #print(u'%4x' % ord(u']'))
    str_exp = u'%s%s%s([^%s]*)%sに傍点%s' % (
        u'\uff3b', u'\uff03', u'\u300c', u'\u300d', u'\u300d', u'\uff3d')
    exp = re.compile(str_exp)
    #print(str_exp)
    doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
    text = doc.getText()
    
    e_para = text.createEnumeration()
    while e_para.hasMoreElements():
        para = e_para.nextElement()
        e_line = para.createEnumeration()
        while e_line.hasMoreElements():
            line = e_line.nextElement()
            txt = line.getString()
            founds = exp.finditer(txt)
            if founds:
                l_e = 0
                for found in founds:
                    rc = len(found.group(0))
                    cursor = text.createTextCursorByRange(line.getStart())
                    cursor.goRight(found.start(0) - l_e, False)
                    cursor.goRight(rc, True)
                    cursor.setString(u'')
                    cursor.collapseToStart()
                    
                    cursor.goLeft(len(found.group(1)), True)
                    cursor.CharEmphasis = FE_DOT_ABOVE
                    #cursor.CharBackColor = 16776960
                    
                    l_e += rc

注釈のうち字下げのものを変換。字下げ指定によってはうまくいかないかも。

  0
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
def addIndentedParaStyle(doc):
    base_name = u'Preformatted Text'
    indented_name = u'Indented %s' % base_name
    sfamilies = doc.getStyleFamilies()
    styles = sfamilies.getByName(u'ParagraphStyles')
    if not styles.hasByName(indented_name):
        style = doc.createInstance(u'com.sun.star.style.ParagraphStyle')
        styles.insertByName(indented_name, style)
        style.setParentStyle(base_name)
        # increment the indent
        style.ParaLeftMargin = style.ParaTabStops[0].Position
    return indented_name
 
 
def indenttest():
    import re
    start_exp = re.compile(u'%s%sここから2字下げ%s' % (
        u'\uff3b', u'\uff03', u'\uff3d'))
    end_exp = re.compile(u'%s%sここで字下げ終わり%s' % (
        u'\uff3b', u'\uff03', u'\uff3d'))
    
    doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
    indented_style = addIndentedParaStyle(doc)
    text = doc.getText()
    
    started = False
    e_para = text.createEnumeration()
    while e_para.hasMoreElements():
        para = e_para.nextElement()
        
        if started:
            if end_exp.match(para.getString()):
                para.setString(u'')
                started = False
            else:
                cursor = text.createTextCursorByRange(para.getStart())
                cursor.ParaStyleName = indented_style
        else:
            # not started
            if start_exp.match(para.getString()):
                para.setString(u'')
                started = True

注釈の外字を置き換え。

  0
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
class mkt():
    def __init__(self, encode='jisx0213'):
        """jisx0213 is the same as eucjis2004"""
        import codecs
        self.codec = codecs.lookup(encode)
        self.mn = 0xa0
    def decode(self, mkt):
        parts = [int(c) + self.mn for c in mkt.split('-')]
        plen = len(parts)
        if plen == 3:
            if parts[0] == 0xa1:
                return self.codec.decode(
                    chr(parts[1]) + chr(parts[2]))[0]
            elif parts[0] == 0xa2:
                return self.codec.decode(
                    chr(0x8f) + chr(parts[1]) + chr(parts[2]))[0]
        return u''
 
 
def note_jisx0213():
    import re
    str_exp = re.compile(u'※%s%s([^%s]*)%s' % (
        u'\uff3b', u'\uff03', u'\uff3d', u'\uff3d'))
    code_exp = re.compile(u'[1-2]-\d{1,2}-\d{1,2}')
    mkt_coder = mkt()
    decoder = mkt_coder.decode
    
    doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
    text = doc.getText()
    
    e_para = text.createEnumeration()
    while e_para.hasMoreElements():
        para = e_para.nextElement()
        e_line = para.createEnumeration()
        while e_line.hasMoreElements():
            line = e_line.nextElement()
            txt = line.getString()
            founds = str_exp.finditer(txt)
            if founds:
                l_e = 0
                for found in founds:
                    code_found = code_exp.search(found.group(1))
                    if code_found:
                        c = decoder(code_found.group(0))
                        
                        rc = len(found.group(0)) - 1# * is replaced by the charactor
                        cursor = text.createTextCursorByRange(line.getStart())
                        cursor.goRight(found.start(0) - l_e, False)
                        cursor.goRight(rc + 1, True)
                        cursor.setString(u'')
                        cursor.setString(c)
                        #cursor.CharBackColor = 16776960
                        
                        l_e += rc

実行する順序は

  1. 外字置換
  2. 傍点
  3. ルビ

インデントは前後どちらでもよい。

テキストによっては注釈の書式があわないかもしれない。

回答 Edit


感想,コメント,メモ Edit



Reload   New Lower page making Edit Freeze Diff Upload Copy Rename   Front page List of pages Search Recent changes Backup   Help   RSS of recent changes