Top > OOobbs2 > 193
** [[OOobbs2/193]] [#od09a570]
-''サマリ'': 青空文庫の記号変換マクロ
-''環境'': Writer
-''状態'': 投稿
-''投稿者'': [[はにゃ?]]
-''投稿日'': 2009-11-02 (月) 02:31:17
*** 質問 [#q9c072f9]
ルビを変換。
#code(python){{
def rubytest():
	import re
	from unicodedata import name as u_name
	
	vbar = u'\uff5c' # vertical bar
	str_exp = u'%s([^%s]*)%s' % (u'\u300a', u'\u300b', u'\u300b') # parenthesis
	exp = re.compile(str_exp)
	
	doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
	text = doc.getText()
	
	e_para = text.createEnumeration()
	while e_para.hasMoreElements():
		para = e_para.nextElement()
		e_line = para.createEnumeration()
		while e_line.hasMoreElements():
			line = e_line.nextElement()
			txt = line.getString()
			founds = exp.finditer(txt)
			l_e = 0 # eliminated number of chars in the line
			
			if founds:
				for found in founds:
					ruby = found.group(1)
					n = 0
					e = False
					# find boundary of the ruby part
					for s in reversed(found.string[0:found.start(1)-1]):
						if s == vbar: e = True
						if u_name(s)[0:4] != 'CJK ': break
						n += 1
					cursor = text.createTextCursorByRange(line.getStart())
					
					# remove ruby text
					rc = len(ruby) + 2
					cursor.goRight(found.start(0) - l_e, False)
					cursor.goRight(rc, True)
					cursor.setString(u'')
					cursor.collapseToStart()
					
					# set ruby 
					cursor.goLeft(n, True)
					cursor.RubyText = ruby
					cursor.RubyAdjust = 4
					
					# remove vbar
					if e:
						cursor.collapseToStart()
						cursor.goLeft(1, True)
						cursor.setString(u'')
						l_e += 1
					
					l_e += rc
}}

注釈のうち傍点のものを変換。

#code(python){{
def notetest_emphasis():
	from com.sun.star.text.FontEmphasis import DOT_ABOVE as FE_DOT_ABOVE
	import re
	#print(u'%4x' % ord(u']'))
	str_exp = u'%s%s%s([^%s]*)%sに傍点%s' % (
		u'\uff3b', u'\uff03', u'\u300c', u'\u300d', u'\u300d', u'\uff3d')
	exp = re.compile(str_exp)
	#print(str_exp)
	doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
	text = doc.getText()
	
	e_para = text.createEnumeration()
	while e_para.hasMoreElements():
		para = e_para.nextElement()
		e_line = para.createEnumeration()
		while e_line.hasMoreElements():
			line = e_line.nextElement()
			txt = line.getString()
			founds = exp.finditer(txt)
			if founds:
				l_e = 0
				for found in founds:
					rc = len(found.group(0))
					cursor = text.createTextCursorByRange(line.getStart())
					cursor.goRight(found.start(0) - l_e, False)
					cursor.goRight(rc, True)
					cursor.setString(u'')
					cursor.collapseToStart()
					
					cursor.goLeft(len(found.group(1)), True)
					cursor.CharEmphasis = FE_DOT_ABOVE
					#cursor.CharBackColor = 16776960
					
					l_e += rc
}}

注釈のうち字下げのものを変換。字下げ指定によってはうまくいかないかも。
#code(python){{
def addIndentedParaStyle(doc):
	base_name = u'Preformatted Text'
	indented_name = u'Indented %s' % base_name
	sfamilies = doc.getStyleFamilies()
	styles = sfamilies.getByName(u'ParagraphStyles')
	if not styles.hasByName(indented_name):
		style = doc.createInstance(u'com.sun.star.style.ParagraphStyle')
		styles.insertByName(indented_name, style)
		style.setParentStyle(base_name)
		# increment the indent
		style.ParaLeftMargin = style.ParaTabStops[0].Position
	return indented_name


def indenttest():
	import re
	start_exp = re.compile(u'%s%sここから2字下げ%s' % (
		u'\uff3b', u'\uff03', u'\uff3d'))
	end_exp = re.compile(u'%s%sここで字下げ終わり%s' % (
		u'\uff3b', u'\uff03', u'\uff3d'))
	
	doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
	indented_style = addIndentedParaStyle(doc)
	text = doc.getText()
	
	started = False
	e_para = text.createEnumeration()
	while e_para.hasMoreElements():
		para = e_para.nextElement()
		
		if started:
			if end_exp.match(para.getString()):
				para.setString(u'')
				started = False
			else:
				cursor = text.createTextCursorByRange(para.getStart())
				cursor.ParaStyleName = indented_style
		else:
			# not started
			if start_exp.match(para.getString()):
				para.setString(u'')
				started = True
}}

注釈の外字を置き換え。

#code(python){{
class mkt():
	def __init__(self, encode='jisx0213'):
		"""jisx0213 is the same as eucjis2004"""
		import codecs
		self.codec = codecs.lookup(encode)
		self.mn = 0xa0
	def decode(self, mkt):
		parts = [int(c) + self.mn for c in mkt.split('-')]
		plen = len(parts)
		if plen == 3:
			if parts[0] == 0xa1:
				return self.codec.decode(
					chr(parts[1]) + chr(parts[2]))[0]
			elif parts[0] == 0xa2:
				return self.codec.decode(
					chr(0x8f) + chr(parts[1]) + chr(parts[2]))[0]
		return u''


def note_jisx0213():
	import re
	str_exp = re.compile(u'※%s%s([^%s]*)%s' % (
		u'\uff3b', u'\uff03', u'\uff3d', u'\uff3d'))
	code_exp = re.compile(u'[1-2]-\d{1,2}-\d{1,2}')
	mkt_coder = mkt()
	decoder = mkt_coder.decode
	
	doc = XSCRIPTCONTEXT.getDesktop().getCurrentComponent()
	text = doc.getText()
	
	e_para = text.createEnumeration()
	while e_para.hasMoreElements():
		para = e_para.nextElement()
		e_line = para.createEnumeration()
		while e_line.hasMoreElements():
			line = e_line.nextElement()
			txt = line.getString()
			founds = str_exp.finditer(txt)
			if founds:
				l_e = 0
				for found in founds:
					code_found = code_exp.search(found.group(1))
					if code_found:
						c = decoder(code_found.group(0))
						
						rc = len(found.group(0)) - 1# * is replaced by the charactor
						cursor = text.createTextCursorByRange(line.getStart())
						cursor.goRight(found.start(0) - l_e, False)
						cursor.goRight(rc + 1, True)
						cursor.setString(u'')
						cursor.setString(c)
						#cursor.CharBackColor = 16776960
						
						l_e += rc
}}

実行する順序は
+ 外字置換
+ 傍点
+ ルビ

インデントは前後どちらでもよい。

テキストによっては注釈の書式があわないかもしれない。
*** 回答 [#hde047bb]

#comment


*** 感想,コメント,メモ [#y7ef35e3]

#comment

Reload   New Lower page making Edit Freeze Diff Upload Copy Rename   Front page List of pages Search Recent changes Backup   Help   RSS of recent changes