|
- サマリ: 青空文庫の記号変換マクロ
- 環境: Writer
- 状態: 投稿
- 投稿者: はにゃ??
- 投稿日: 2009-11-02 (月) 02:31:17
質問 
ルビを変換。
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
| | def rubytest():
import re
from unicodedata import name as u_name
vbar = u'\uff5c' # vertical bar
str_exp = u'%s([^%s]*)%s' % (u'\u300a', u'\u300b', u'\u300b') # parenthesis
exp = re.compile(str_exp)
doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
text = doc.getText()
e_para = text.createEnumeration()
while e_para.hasMoreElements():
para = e_para.nextElement()
e_line = para.createEnumeration()
while e_line.hasMoreElements():
line = e_line.nextElement()
txt = line.getString()
founds = exp.finditer(txt)
l_e = 0 # eliminated number of chars in the line
if founds:
for found in founds:
ruby = found.group(1)
n = 0
e = False
# find boundary of the ruby part
for s in reversed(found.string[0:found.start(1)-1]):
if s == vbar: e = True
if u_name(s)[0:4] != 'CJK ': break
n += 1
cursor = text.createTextCursorByRange(line.getStart())
# remove ruby text
rc = len(ruby) + 2
cursor.goRight(found.start(0) - l_e, False)
cursor.goRight(rc, True)
cursor.setString(u'')
cursor.collapseToStart()
# set ruby
cursor.goLeft(n, True)
cursor.RubyText = ruby
cursor.RubyAdjust = 4
# remove vbar
if e:
cursor.collapseToStart()
cursor.goLeft(1, True)
cursor.setString(u'')
l_e += 1
l_e += rc
|
注釈のうち傍点のものを変換。
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
| | def notetest_emphasis():
from com.sun.star.text.FontEmphasis import DOT_ABOVE as FE_DOT_ABOVE
import re
#print(u'%4x' % ord(u']'))
str_exp = u'%s%s%s([^%s]*)%sに傍点%s' % (
u'\uff3b', u'\uff03', u'\u300c', u'\u300d', u'\u300d', u'\uff3d')
exp = re.compile(str_exp)
#print(str_exp)
doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
text = doc.getText()
e_para = text.createEnumeration()
while e_para.hasMoreElements():
para = e_para.nextElement()
e_line = para.createEnumeration()
while e_line.hasMoreElements():
line = e_line.nextElement()
txt = line.getString()
founds = exp.finditer(txt)
if founds:
l_e = 0
for found in founds:
rc = len(found.group(0))
cursor = text.createTextCursorByRange(line.getStart())
cursor.goRight(found.start(0) - l_e, False)
cursor.goRight(rc, True)
cursor.setString(u'')
cursor.collapseToStart()
cursor.goLeft(len(found.group(1)), True)
cursor.CharEmphasis = FE_DOT_ABOVE
#cursor.CharBackColor = 16776960
l_e += rc
|
注釈のうち字下げのものを変換。字下げ指定によってはうまくいかないかも。
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
| | def addIndentedParaStyle(doc):
base_name = u'Preformatted Text'
indented_name = u'Indented %s' % base_name
sfamilies = doc.getStyleFamilies()
styles = sfamilies.getByName(u'ParagraphStyles')
if not styles.hasByName(indented_name):
style = doc.createInstance(u'com.sun.star.style.ParagraphStyle')
styles.insertByName(indented_name, style)
style.setParentStyle(base_name)
# increment the indent
style.ParaLeftMargin = style.ParaTabStops[0].Position
return indented_name
def indenttest():
import re
start_exp = re.compile(u'%s%sここから2字下げ%s' % (
u'\uff3b', u'\uff03', u'\uff3d'))
end_exp = re.compile(u'%s%sここで字下げ終わり%s' % (
u'\uff3b', u'\uff03', u'\uff3d'))
doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
indented_style = addIndentedParaStyle(doc)
text = doc.getText()
started = False
e_para = text.createEnumeration()
while e_para.hasMoreElements():
para = e_para.nextElement()
if started:
if end_exp.match(para.getString()):
para.setString(u'')
started = False
else:
cursor = text.createTextCursorByRange(para.getStart())
cursor.ParaStyleName = indented_style
else:
# not started
if start_exp.match(para.getString()):
para.setString(u'')
started = True
|
注釈の外字を置き換え。
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
| | class mkt():
def __init__(self, encode='jisx0213'):
"""jisx0213 is the same as eucjis2004"""
import codecs
self.codec = codecs.lookup(encode)
self.mn = 0xa0
def decode(self, mkt):
parts = [int(c) + self.mn for c in mkt.split('-')]
plen = len(parts)
if plen == 3:
if parts[0] == 0xa1:
return self.codec.decode(
chr(parts[1]) + chr(parts[2]))[0]
elif parts[0] == 0xa2:
return self.codec.decode(
chr(0x8f) + chr(parts[1]) + chr(parts[2]))[0]
return u''
def note_jisx0213():
import re
str_exp = re.compile(u'※%s%s([^%s]*)%s' % (
u'\uff3b', u'\uff03', u'\uff3d', u'\uff3d'))
code_exp = re.compile(u'[1-2]-\d{1,2}-\d{1,2}')
mkt_coder = mkt()
decoder = mkt_coder.decode
doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
text = doc.getText()
e_para = text.createEnumeration()
while e_para.hasMoreElements():
para = e_para.nextElement()
e_line = para.createEnumeration()
while e_line.hasMoreElements():
line = e_line.nextElement()
txt = line.getString()
founds = str_exp.finditer(txt)
if founds:
l_e = 0
for found in founds:
code_found = code_exp.search(found.group(1))
if code_found:
c = decoder(code_found.group(0))
rc = len(found.group(0)) - 1# * is replaced by the charactor
cursor = text.createTextCursorByRange(line.getStart())
cursor.goRight(found.start(0) - l_e, False)
cursor.goRight(rc + 1, True)
cursor.setString(u'')
cursor.setString(c)
#cursor.CharBackColor = 16776960
l_e += rc
|
実行する順序は
- 外字置換
- 傍点
- ルビ
インデントは前後どちらでもよい。
テキストによっては注釈の書式があわないかもしれない。
回答 
感想,コメント,メモ 
|