Forked mumble-django project from https://bitbucket.org/Svedrin/mumble-django
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1443 lines
49 KiB

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # License: MIT (see LICENSE file provided)
  5. # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
  6. """
  7. **polib** allows you to manipulate, create, modify gettext files (pot, po
  8. and mo files). You can load existing files, iterate through it's entries,
  9. add, modify entries, comments or metadata, etc... or create new po files
  10. from scratch.
  11. **polib** provides a simple and pythonic API, exporting only three
  12. convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
  13. four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
  14. new files/entries.
  15. **Basic example**:
  16. >>> import polib
  17. >>> # load an existing po file
  18. >>> po = polib.pofile('tests/test_utf8.po')
  19. >>> for entry in po:
  20. ... # do something with entry...
  21. ... pass
  22. >>> # add an entry
  23. >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
  24. >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
  25. >>> po.append(entry)
  26. >>> # to save our modified po file:
  27. >>> # po.save()
  28. >>> # or you may want to compile the po file
  29. >>> # po.save_as_mofile('tests/test_utf8.mo')
  30. * Copyright (C) 2010, David JEAN LOUIS <izimobil@gmail.com>
  31. *
  32. * Permission is hereby granted, free of charge, to any person obtaining a copy
  33. * of this software and associated documentation files (the "Software"), to deal
  34. * in the Software without restriction, including without limitation the rights
  35. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  36. * copies of the Software, and to permit persons to whom the Software is
  37. * furnished to do so, subject to the following conditions:
  38. *
  39. * The above copyright notice and this permission notice shall be included in
  40. * all copies or substantial portions of the Software.
  41. *
  42. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  43. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  44. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  45. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  46. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  47. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  48. * THE SOFTWARE.
  49. """
  50. __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
  51. __version__ = '0.4.3'
  52. __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
  53. 'detect_encoding', 'escape', 'unescape']
  54. import codecs
  55. import struct
  56. import textwrap
  57. import types
  58. default_encoding = 'utf-8'
  59. # function pofile() {{{
  60. def pofile(fpath, **kwargs):
  61. """
  62. Convenience function that parse the po/pot file *fpath* and return
  63. a POFile instance.
  64. **Keyword arguments**:
  65. - *fpath*: string, full or relative path to the po/pot file to parse
  66. - *wrapwidth*: integer, the wrap width, only useful when -w option was
  67. passed to xgettext (optional, default to 78)
  68. - *autodetect_encoding*: boolean, if set to False the function will
  69. not try to detect the po file encoding (optional, default to True)
  70. - *encoding*: string, an encoding, only relevant if autodetect_encoding
  71. is set to False
  72. **Example**:
  73. >>> import polib
  74. >>> po = polib.pofile('tests/test_weird_occurrences.po')
  75. >>> po #doctest: +ELLIPSIS
  76. <POFile instance at ...>
  77. >>> import os, tempfile
  78. >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
  79. ... orig_po = polib.pofile('tests/'+fname)
  80. ... tmpf = tempfile.NamedTemporaryFile().name
  81. ... orig_po.save(tmpf)
  82. ... try:
  83. ... new_po = polib.pofile(tmpf)
  84. ... for old, new in zip(orig_po, new_po):
  85. ... if old.msgid != new.msgid:
  86. ... old.msgid
  87. ... new.msgid
  88. ... if old.msgstr != new.msgstr:
  89. ... old.msgid
  90. ... new.msgid
  91. ... finally:
  92. ... os.unlink(tmpf)
  93. >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
  94. >>> tmpf = tempfile.NamedTemporaryFile().name
  95. >>> po_file.save_as_mofile(tmpf)
  96. >>> try:
  97. ... mo_file = polib.mofile(tmpf)
  98. ... for old, new in zip(po_file, mo_file):
  99. ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
  100. ... 'OLD: ', po_file._encode(old.msgid)
  101. ... 'NEW: ', mo_file._encode(new.msgid)
  102. ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
  103. ... 'OLD: ', po_file._encode(old.msgstr)
  104. ... 'NEW: ', mo_file._encode(new.msgstr)
  105. ... print new.msgstr
  106. ... finally:
  107. ... os.unlink(tmpf)
  108. """
  109. if kwargs.get('autodetect_encoding', True) == True:
  110. enc = detect_encoding(fpath)
  111. else:
  112. enc = kwargs.get('encoding', default_encoding)
  113. parser = _POFileParser(fpath, enc)
  114. instance = parser.parse()
  115. instance.wrapwidth = kwargs.get('wrapwidth', 78)
  116. return instance
  117. # }}}
  118. # function mofile() {{{
  119. def mofile(fpath, **kwargs):
  120. """
  121. Convenience function that parse the mo file *fpath* and return
  122. a MOFile instance.
  123. **Keyword arguments**:
  124. - *fpath*: string, full or relative path to the mo file to parse
  125. - *wrapwidth*: integer, the wrap width, only useful when -w option was
  126. passed to xgettext to generate the po file that was used to format
  127. the mo file (optional, default to 78)
  128. - *autodetect_encoding*: boolean, if set to False the function will
  129. not try to detect the po file encoding (optional, default to True)
  130. - *encoding*: string, an encoding, only relevant if autodetect_encoding
  131. is set to False
  132. **Example**:
  133. >>> import polib
  134. >>> mo = polib.mofile('tests/test_utf8.mo')
  135. >>> mo #doctest: +ELLIPSIS
  136. <MOFile instance at ...>
  137. >>> import os, tempfile
  138. >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
  139. ... orig_mo = polib.mofile('tests/'+fname)
  140. ... tmpf = tempfile.NamedTemporaryFile().name
  141. ... orig_mo.save(tmpf)
  142. ... try:
  143. ... new_mo = polib.mofile(tmpf)
  144. ... for old, new in zip(orig_mo, new_mo):
  145. ... if old.msgid != new.msgid:
  146. ... old.msgstr
  147. ... new.msgstr
  148. ... finally:
  149. ... os.unlink(tmpf)
  150. """
  151. if kwargs.get('autodetect_encoding', True) == True:
  152. enc = detect_encoding(fpath, True)
  153. else:
  154. enc = kwargs.get('encoding', default_encoding)
  155. parser = _MOFileParser(fpath, enc)
  156. instance = parser.parse()
  157. instance.wrapwidth = kwargs.get('wrapwidth', 78)
  158. return instance
  159. # }}}
  160. # function detect_encoding() {{{
  161. def detect_encoding(fpath, binary_mode=False):
  162. """
  163. Try to detect the encoding used by the file *fpath*. The function will
  164. return polib default *encoding* if it's unable to detect it.
  165. **Keyword argument**:
  166. - *fpath*: string, full or relative path to the mo file to parse.
  167. **Examples**:
  168. >>> print(detect_encoding('tests/test_noencoding.po'))
  169. utf-8
  170. >>> print(detect_encoding('tests/test_utf8.po'))
  171. UTF-8
  172. >>> print(detect_encoding('tests/test_utf8.mo', True))
  173. UTF-8
  174. >>> print(detect_encoding('tests/test_iso-8859-15.po'))
  175. ISO_8859-15
  176. >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
  177. ISO_8859-15
  178. """
  179. import re
  180. rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
  181. if binary_mode:
  182. mode = 'rb'
  183. else:
  184. mode = 'r'
  185. f = open(fpath, mode)
  186. for l in f.readlines():
  187. match = rx.search(l)
  188. if match:
  189. f.close()
  190. return match.group(1).strip()
  191. f.close()
  192. return default_encoding
  193. # }}}
  194. # function escape() {{{
  195. def escape(st):
  196. """
  197. Escape special chars and return the given string *st*.
  198. **Examples**:
  199. >>> escape('\\t and \\n and \\r and " and \\\\')
  200. '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
  201. """
  202. st = st.replace('\\', r'\\')
  203. st = st.replace('\t', r'\t')
  204. st = st.replace('\r', r'\r')
  205. st = st.replace('\n', r'\n')
  206. st = st.replace('\"', r'\"')
  207. return st
  208. # }}}
  209. # function unescape() {{{
  210. def unescape(st):
  211. """
  212. Unescape special chars and return the given string *st*.
  213. **Examples**:
  214. >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
  215. '\\t and \\n and \\r and " and \\\\'
  216. """
  217. st = st.replace(r'\"', '"')
  218. st = st.replace(r'\n', '\n')
  219. st = st.replace(r'\r', '\r')
  220. st = st.replace(r'\t', '\t')
  221. st = st.replace(r'\\', '\\')
  222. return st
  223. # }}}
  224. # class _BaseFile {{{
  225. class _BaseFile(list):
  226. """
  227. Common parent class for POFile and MOFile classes.
  228. This class must **not** be instanciated directly.
  229. """
  230. def __init__(self, fpath=None, wrapwidth=78, encoding=default_encoding):
  231. """
  232. Constructor.
  233. **Keyword arguments**:
  234. - *fpath*: string, path to po or mo file
  235. - *wrapwidth*: integer, the wrap width, only useful when -w option
  236. was passed to xgettext to generate the po file that was used to
  237. format the mo file, default to 78 (optional).
  238. """
  239. list.__init__(self)
  240. # the opened file handle
  241. self.fpath = fpath
  242. # the width at which lines should be wrapped
  243. self.wrapwidth = wrapwidth
  244. # the file encoding
  245. self.encoding = encoding
  246. # header
  247. self.header = ''
  248. # both po and mo files have metadata
  249. self.metadata = {}
  250. self.metadata_is_fuzzy = 0
  251. def __str__(self):
  252. """String representation of the file."""
  253. ret = []
  254. entries = [self.metadata_as_entry()] + \
  255. [e for e in self if not e.obsolete]
  256. for entry in entries:
  257. ret.append(entry.__str__(self.wrapwidth))
  258. for entry in self.obsolete_entries():
  259. ret.append(entry.__str__(self.wrapwidth))
  260. return '\n'.join(ret)
  261. def __repr__(self):
  262. """Return the official string representation of the object."""
  263. return '<%s instance at %x>' % (self.__class__.__name__, id(self))
  264. def metadata_as_entry(self):
  265. """Return the metadata as an entry"""
  266. e = POEntry(msgid='')
  267. mdata = self.ordered_metadata()
  268. if mdata:
  269. strs = []
  270. for name, value in mdata:
  271. # Strip whitespace off each line in a multi-line entry
  272. value = '\n'.join([v.strip() for v in value.split('\n')])
  273. strs.append('%s: %s' % (name, value))
  274. e.msgstr = '\n'.join(strs) + '\n'
  275. return e
  276. def save(self, fpath=None, repr_method='__str__'):
  277. """
  278. Save the po file to file *fpath* if no file handle exists for
  279. the object. If there's already an open file and no fpath is
  280. provided, then the existing file is rewritten with the modified
  281. data.
  282. **Keyword arguments**:
  283. - *fpath*: string, full or relative path to the file.
  284. - *repr_method*: string, the method to use for output.
  285. """
  286. if self.fpath is None and fpath is None:
  287. raise IOError('You must provide a file path to save() method')
  288. contents = getattr(self, repr_method)()
  289. if fpath is None:
  290. fpath = self.fpath
  291. if repr_method == 'to_binary':
  292. fhandle = open(fpath, 'wb')
  293. else:
  294. fhandle = codecs.open(fpath, 'w', self.encoding)
  295. fhandle.write(contents)
  296. fhandle.close()
  297. def find(self, st, by='msgid'):
  298. """
  299. Find entry which msgid (or property identified by the *by*
  300. attribute) matches the string *st*.
  301. **Keyword arguments**:
  302. - *st*: string, the string to search for
  303. - *by*: string, the comparison attribute
  304. **Examples**:
  305. >>> po = pofile('tests/test_utf8.po')
  306. >>> entry = po.find('Thursday')
  307. >>> entry.msgstr
  308. u'Jueves'
  309. >>> entry = po.find('Some unexistant msgid')
  310. >>> entry is None
  311. True
  312. >>> entry = po.find('Jueves', 'msgstr')
  313. >>> entry.msgid
  314. u'Thursday'
  315. """
  316. for e in self:
  317. if getattr(e, by) == st:
  318. return e
  319. return None
  320. def ordered_metadata(self):
  321. """
  322. Convenience method that return the metadata ordered. The return
  323. value is list of tuples (metadata name, metadata_value).
  324. """
  325. # copy the dict first
  326. metadata = self.metadata.copy()
  327. data_order = [
  328. 'Project-Id-Version',
  329. 'Report-Msgid-Bugs-To',
  330. 'POT-Creation-Date',
  331. 'PO-Revision-Date',
  332. 'Last-Translator',
  333. 'Language-Team',
  334. 'MIME-Version',
  335. 'Content-Type',
  336. 'Content-Transfer-Encoding'
  337. ]
  338. ordered_data = []
  339. for data in data_order:
  340. try:
  341. value = metadata.pop(data)
  342. ordered_data.append((data, value))
  343. except KeyError:
  344. pass
  345. # the rest of the metadata won't be ordered there are no specs for this
  346. keys = metadata.keys()
  347. list(keys).sort()
  348. for data in keys:
  349. value = metadata[data]
  350. ordered_data.append((data, value))
  351. return ordered_data
  352. def to_binary(self):
  353. """Return the mofile binary representation."""
  354. import array
  355. import struct
  356. import types
  357. offsets = []
  358. entries = self.translated_entries()
  359. # the keys are sorted in the .mo file
  360. def cmp(_self, other):
  361. if _self.msgid > other.msgid:
  362. return 1
  363. elif _self.msgid < other.msgid:
  364. return -1
  365. else:
  366. return 0
  367. # add metadata entry
  368. entries.sort(cmp)
  369. mentry = self.metadata_as_entry()
  370. mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
  371. entries = [mentry] + entries
  372. entries_len = len(entries)
  373. ids, strs = '', ''
  374. for e in entries:
  375. # For each string, we need size and file offset. Each string is
  376. # NUL terminated; the NUL does not count into the size.
  377. if e.msgid_plural:
  378. indexes = e.msgstr_plural.keys()
  379. indexes.sort()
  380. msgstr = []
  381. for index in indexes:
  382. msgstr.append(e.msgstr_plural[index])
  383. msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
  384. msgstr = self._encode('\0'.join(msgstr))
  385. else:
  386. msgid = self._encode(e.msgid)
  387. msgstr = self._encode(e.msgstr)
  388. offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
  389. ids += msgid + '\0'
  390. strs += msgstr + '\0'
  391. # The header is 7 32-bit unsigned integers.
  392. keystart = 7*4+16*entries_len
  393. # and the values start after the keys
  394. valuestart = keystart + len(ids)
  395. koffsets = []
  396. voffsets = []
  397. # The string table first has the list of keys, then the list of values.
  398. # Each entry has first the size of the string, then the file offset.
  399. for o1, l1, o2, l2 in offsets:
  400. koffsets += [l1, o1+keystart]
  401. voffsets += [l2, o2+valuestart]
  402. offsets = koffsets + voffsets
  403. output = struct.pack("IIIIIII",
  404. 0x950412de, # Magic number
  405. 0, # Version
  406. entries_len, # # of entries
  407. 7*4, # start of key index
  408. 7*4+entries_len*8, # start of value index
  409. 0, 0) # size and offset of hash table
  410. output += array.array("I", offsets).tostring()
  411. output += ids
  412. output += strs
  413. return output
  414. def _encode(self, mixed):
  415. """
  416. Encode the given argument with the file encoding if the type is unicode
  417. and return the encoded string.
  418. """
  419. if type(mixed) == types.UnicodeType:
  420. return mixed.encode(self.encoding)
  421. return mixed
  422. # }}}
  423. # class POFile {{{
  424. class POFile(_BaseFile):
  425. '''
  426. Po (or Pot) file reader/writer.
  427. POFile objects inherit the list objects methods.
  428. **Example**:
  429. >>> po = POFile()
  430. >>> entry1 = POEntry(
  431. ... msgid="Some english text",
  432. ... msgstr="Un texte en anglais"
  433. ... )
  434. >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
  435. >>> entry1.comment = "Some useful comment"
  436. >>> entry2 = POEntry(
  437. ... msgid="Peace in some languages",
  438. ... msgstr="Pace سلام שלום Hasîtî 和平"
  439. ... )
  440. >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
  441. >>> entry2.comment = "Another useful comment"
  442. >>> entry3 = POEntry(
  443. ... msgid='Some entry with quotes " \\"',
  444. ... msgstr='Un message unicode avec des quotes " \\"'
  445. ... )
  446. >>> entry3.comment = "Test string quoting"
  447. >>> po.append(entry1)
  448. >>> po.append(entry2)
  449. >>> po.append(entry3)
  450. >>> po.header = "Some Header"
  451. >>> print(po)
  452. # Some Header
  453. msgid ""
  454. msgstr ""
  455. <BLANKLINE>
  456. #. Some useful comment
  457. #: testfile:12 another_file:1
  458. msgid "Some english text"
  459. msgstr "Un texte en anglais"
  460. <BLANKLINE>
  461. #. Another useful comment
  462. #: testfile:15 another_file:5
  463. msgid "Peace in some languages"
  464. msgstr "Pace سلام שלום Hasîtî 和平"
  465. <BLANKLINE>
  466. #. Test string quoting
  467. msgid "Some entry with quotes \\" \\""
  468. msgstr "Un message unicode avec des quotes \\" \\""
  469. <BLANKLINE>
  470. '''
  471. def __str__(self):
  472. """Return the string representation of the po file"""
  473. ret, headers = '', self.header.split('\n')
  474. for header in headers:
  475. if header[:1] in [',', ':']:
  476. ret += '#%s\n' % header
  477. else:
  478. ret += '# %s\n' % header
  479. return ret + _BaseFile.__str__(self)
  480. def save_as_mofile(self, fpath):
  481. """
  482. Save the binary representation of the file to *fpath*.
  483. **Keyword arguments**:
  484. - *fpath*: string, full or relative path to the file.
  485. """
  486. _BaseFile.save(self, fpath, 'to_binary')
  487. def percent_translated(self):
  488. """
  489. Convenience method that return the percentage of translated
  490. messages.
  491. **Example**:
  492. >>> import polib
  493. >>> po = polib.pofile('tests/test_pofile_helpers.po')
  494. >>> po.percent_translated()
  495. 50
  496. >>> po = POFile()
  497. >>> po.percent_translated()
  498. 100
  499. """
  500. total = len([e for e in self if not e.obsolete])
  501. if total == 0:
  502. return 100
  503. translated = len(self.translated_entries())
  504. return int((100.00 / float(total)) * translated)
  505. def translated_entries(self):
  506. """
  507. Convenience method that return a list of translated entries.
  508. **Example**:
  509. >>> import polib
  510. >>> po = polib.pofile('tests/test_pofile_helpers.po')
  511. >>> len(po.translated_entries())
  512. 6
  513. """
  514. return [e for e in self if e.translated() and not e.obsolete]
  515. def untranslated_entries(self):
  516. """
  517. Convenience method that return a list of untranslated entries.
  518. **Example**:
  519. >>> import polib
  520. >>> po = polib.pofile('tests/test_pofile_helpers.po')
  521. >>> len(po.untranslated_entries())
  522. 6
  523. """
  524. return [e for e in self if not e.translated() and not e.obsolete]
  525. def fuzzy_entries(self):
  526. """
  527. Convenience method that return the list of 'fuzzy' entries.
  528. **Example**:
  529. >>> import polib
  530. >>> po = polib.pofile('tests/test_pofile_helpers.po')
  531. >>> len(po.fuzzy_entries())
  532. 2
  533. """
  534. return [e for e in self if 'fuzzy' in e.flags]
  535. def obsolete_entries(self):
  536. """
  537. Convenience method that return the list of obsolete entries.
  538. **Example**:
  539. >>> import polib
  540. >>> po = polib.pofile('tests/test_pofile_helpers.po')
  541. >>> len(po.obsolete_entries())
  542. 4
  543. """
  544. return [e for e in self if e.obsolete]
  545. def merge(self, refpot):
  546. """
  547. XXX this could not work if encodings are different, needs thinking
  548. and general refactoring of how polib handles encoding...
  549. Convenience method that merge the current pofile with the pot file
  550. provided. It behaves exactly as the gettext msgmerge utility:
  551. - comments of this file will be preserved, but extracted comments
  552. and occurrences will be discarded
  553. - any translations or comments in the file will be discarded,
  554. however dot comments and file positions will be preserved
  555. **Keyword argument**:
  556. - *refpot*: object POFile, the reference catalog.
  557. **Example**:
  558. >>> import polib
  559. >>> refpot = polib.pofile('tests/test_merge.pot')
  560. >>> po = polib.pofile('tests/test_merge_before.po')
  561. >>> po.merge(refpot)
  562. >>> expected_po = polib.pofile('tests/test_merge_after.po')
  563. >>> unicode(po) == unicode(expected_po)
  564. True
  565. """
  566. for entry in refpot:
  567. e = self.find(entry.msgid)
  568. if e is None:
  569. e = POEntry()
  570. self.append(e)
  571. e.merge(entry)
  572. # ok, now we must "obsolete" entries that are not in the refpot
  573. # anymore
  574. for entry in self:
  575. if refpot.find(entry.msgid) is None:
  576. entry.obsolete = True
  577. # }}}
  578. # class MOFile {{{
  579. class MOFile(_BaseFile):
  580. '''
  581. Mo file reader/writer.
  582. MOFile objects inherit the list objects methods.
  583. **Example**:
  584. >>> mo = MOFile()
  585. >>> entry1 = POEntry(
  586. ... msgid="Some english text",
  587. ... msgstr="Un texte en anglais"
  588. ... )
  589. >>> entry2 = POEntry(
  590. ... msgid="I need my dirty cheese",
  591. ... msgstr="Je veux mon sale fromage"
  592. ... )
  593. >>> entry3 = MOEntry(
  594. ... msgid='Some entry with quotes " \\"',
  595. ... msgstr='Un message unicode avec des quotes " \\"'
  596. ... )
  597. >>> mo.append(entry1)
  598. >>> mo.append(entry2)
  599. >>> mo.append(entry3)
  600. >>> print(mo)
  601. msgid ""
  602. msgstr ""
  603. <BLANKLINE>
  604. msgid "Some english text"
  605. msgstr "Un texte en anglais"
  606. <BLANKLINE>
  607. msgid "I need my dirty cheese"
  608. msgstr "Je veux mon sale fromage"
  609. <BLANKLINE>
  610. msgid "Some entry with quotes \\" \\""
  611. msgstr "Un message unicode avec des quotes \\" \\""
  612. <BLANKLINE>
  613. '''
  614. def __init__(self, *args, **kwargs):
  615. """
  616. MOFile constructor. Mo files have two other properties:
  617. - magic_number: the magic_number of the binary file,
  618. - version: the version of the mo spec.
  619. """
  620. _BaseFile.__init__(self, *args, **kwargs)
  621. self.magic_number = None
  622. self.version = 0
  623. def save_as_pofile(self, fpath):
  624. """
  625. Save the string representation of the file to *fpath*.
  626. **Keyword argument**:
  627. - *fpath*: string, full or relative path to the file.
  628. """
  629. _BaseFile.save(self, fpath)
  630. def save(self, fpath):
  631. """
  632. Save the binary representation of the file to *fpath*.
  633. **Keyword argument**:
  634. - *fpath*: string, full or relative path to the file.
  635. """
  636. _BaseFile.save(self, fpath, 'to_binary')
  637. def percent_translated(self):
  638. """
  639. Convenience method to keep the same interface with POFile instances.
  640. """
  641. return 100
  642. def translated_entries(self):
  643. """
  644. Convenience method to keep the same interface with POFile instances.
  645. """
  646. return self
  647. def untranslated_entries(self):
  648. """
  649. Convenience method to keep the same interface with POFile instances.
  650. """
  651. return []
  652. def fuzzy_entries(self):
  653. """
  654. Convenience method to keep the same interface with POFile instances.
  655. """
  656. return []
  657. def obsolete_entries(self):
  658. """
  659. Convenience method to keep the same interface with POFile instances.
  660. """
  661. return []
  662. # }}}
  663. # class _BaseEntry {{{
  664. class _BaseEntry(object):
  665. """
  666. Base class for POEntry or MOEntry objects.
  667. This class must *not* be instanciated directly.
  668. """
  669. def __init__(self, *args, **kwargs):
  670. """Base Entry constructor."""
  671. self.msgid = kwargs.get('msgid', '')
  672. self.msgstr = kwargs.get('msgstr', '')
  673. self.msgid_plural = kwargs.get('msgid_plural', '')
  674. self.msgstr_plural = kwargs.get('msgstr_plural', {})
  675. self.obsolete = kwargs.get('obsolete', False)
  676. self.encoding = kwargs.get('encoding', default_encoding)
  677. self.msgctxt = kwargs.get('msgctxt', None)
  678. def __repr__(self):
  679. """Return the official string representation of the object."""
  680. return '<%s instance at %x>' % (self.__class__.__name__, id(self))
  681. def __str__(self, wrapwidth=78):
  682. """
  683. Common string representation of the POEntry and MOEntry
  684. objects.
  685. """
  686. if self.obsolete:
  687. delflag = '#~ '
  688. else:
  689. delflag = ''
  690. ret = []
  691. # write the msgctxt if any
  692. if self.msgctxt is not None:
  693. ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
  694. # write the msgid
  695. ret += self._str_field("msgid", delflag, "", self.msgid)
  696. # write the msgid_plural if any
  697. if self.msgid_plural:
  698. ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
  699. if self.msgstr_plural:
  700. # write the msgstr_plural if any
  701. msgstrs = self.msgstr_plural
  702. keys = list(msgstrs)
  703. keys.sort()
  704. for index in keys:
  705. msgstr = msgstrs[index]
  706. plural_index = '[%s]' % index
  707. ret += self._str_field("msgstr", delflag, plural_index, msgstr)
  708. else:
  709. # otherwise write the msgstr
  710. ret += self._str_field("msgstr", delflag, "", self.msgstr)
  711. ret.append('')
  712. return '\n'.join(ret)
  713. def _str_field(self, fieldname, delflag, plural_index, field):
  714. lines = field.splitlines(True) # keep line breaks in strings
  715. # potentially, we could do line-wrapping here, but textwrap.wrap
  716. # treats whitespace too carelessly for us to use it.
  717. if len(lines) > 1:
  718. lines = ['']+lines # start with initial empty line
  719. else:
  720. lines = [field] # needed for the empty string case
  721. ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
  722. escape(lines.pop(0)))]
  723. for mstr in lines:
  724. ret.append('%s"%s"' % (delflag, escape(mstr)))
  725. return ret
  726. # }}}
  727. # class POEntry {{{
  728. class POEntry(_BaseEntry):
  729. """
  730. Represents a po file entry.
  731. **Examples**:
  732. >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
  733. >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
  734. >>> print(entry)
  735. #: welcome.py:12 anotherfile.py:34
  736. msgid "Welcome"
  737. msgstr "Bienvenue"
  738. <BLANKLINE>
  739. >>> entry = POEntry()
  740. >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
  741. >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
  742. >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
  743. >>> entry.flags.append('c-format')
  744. >>> entry.msgid = 'I have spam but no egg !'
  745. >>> entry.msgid_plural = 'I have spam and %d eggs !'
  746. >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
  747. >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
  748. >>> print(entry)
  749. #. A plural translation. This is a very very very long line please do not
  750. #. wrap, this is just for testing comment wrapping...
  751. # A plural translation. This is a very very very long line please do not wrap,
  752. # this is just for testing comment wrapping...
  753. #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
  754. #: src/eggs.c:45
  755. #, c-format
  756. msgid "I have spam but no egg !"
  757. msgid_plural "I have spam and %d eggs !"
  758. msgstr[0] "J'ai du jambon mais aucun oeuf !"
  759. msgstr[1] "J'ai du jambon et %d oeufs !"
  760. <BLANKLINE>
  761. """
  762. def __init__(self, *args, **kwargs):
  763. """POEntry constructor."""
  764. _BaseEntry.__init__(self, *args, **kwargs)
  765. self.comment = kwargs.get('comment', '')
  766. self.tcomment = kwargs.get('tcomment', '')
  767. self.occurrences = kwargs.get('occurrences', [])
  768. self.flags = kwargs.get('flags', [])
  769. def __str__(self, wrapwidth=78):
  770. """
  771. Return the string representation of the entry.
  772. """
  773. if self.obsolete:
  774. return _BaseEntry.__str__(self)
  775. ret = []
  776. # comment first, if any (with text wrapping as xgettext does)
  777. if self.comment != '':
  778. for comment in self.comment.split('\n'):
  779. if wrapwidth > 0 and len(comment) > wrapwidth-3:
  780. ret += textwrap.wrap(comment, wrapwidth,
  781. initial_indent='#. ',
  782. subsequent_indent='#. ',
  783. break_long_words=False)
  784. else:
  785. ret.append('#. %s' % comment)
  786. # translator comment, if any (with text wrapping as xgettext does)
  787. if self.tcomment != '':
  788. for tcomment in self.tcomment.split('\n'):
  789. if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
  790. ret += textwrap.wrap(tcomment, wrapwidth,
  791. initial_indent='# ',
  792. subsequent_indent='# ',
  793. break_long_words=False)
  794. else:
  795. ret.append('# %s' % tcomment)
  796. # occurrences (with text wrapping as xgettext does)
  797. if self.occurrences:
  798. filelist = []
  799. for fpath, lineno in self.occurrences:
  800. if lineno:
  801. filelist.append('%s:%s' % (fpath, lineno))
  802. else:
  803. filelist.append(fpath)
  804. filestr = ' '.join(filelist)
  805. if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
  806. # XXX textwrap split words that contain hyphen, this is not
  807. # what we want for filenames, so the dirty hack is to
  808. # temporally replace hyphens with a char that a file cannot
  809. # contain, like "*"
  810. lines = textwrap.wrap(filestr.replace('-', '*'),
  811. wrapwidth,
  812. initial_indent='#: ',
  813. subsequent_indent='#: ',
  814. break_long_words=False)
  815. # end of the replace hack
  816. for line in lines:
  817. ret.append(line.replace('*', '-'))
  818. else:
  819. ret.append('#: '+filestr)
  820. # flags
  821. if self.flags:
  822. flags = []
  823. for flag in self.flags:
  824. flags.append(flag)
  825. ret.append('#, %s' % ', '.join(flags))
  826. ret.append(_BaseEntry.__str__(self))
  827. return '\n'.join(ret)
  828. def __cmp__(self, other):
  829. '''
  830. Called by comparison operations if rich comparison is not defined.
  831. **Tests**:
  832. >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
  833. >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
  834. >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
  835. >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
  836. >>> po = POFile()
  837. >>> po.append(a)
  838. >>> po.append(b)
  839. >>> po.append(c1)
  840. >>> po.append(c2)
  841. >>> po.sort()
  842. >>> print(po)
  843. #
  844. msgid ""
  845. msgstr ""
  846. <BLANKLINE>
  847. #: a.py:1 a.py:3
  848. msgid "c2"
  849. msgstr ""
  850. <BLANKLINE>
  851. #: a.py:1 b.py:1
  852. msgid "c1"
  853. msgstr ""
  854. <BLANKLINE>
  855. #: b.py:1 b.py:3
  856. msgid "a"
  857. msgstr ""
  858. <BLANKLINE>
  859. #: b.py:1 b.py:3
  860. msgid "b"
  861. msgstr ""
  862. <BLANKLINE>
  863. '''
  864. def compare_occurrences(a, b):
  865. """
  866. Compare an entry occurrence with another one.
  867. """
  868. if a[0] != b[0]:
  869. return a[0] < b[0]
  870. if a[1] != b[1]:
  871. return a[1] < b[1]
  872. return 0
  873. # First: Obsolete test
  874. if self.obsolete != other.obsolete:
  875. if self.obsolete:
  876. return -1
  877. else:
  878. return 1
  879. # Work on a copy to protect original
  880. occ1 = self.occurrences[:]
  881. occ2 = other.occurrences[:]
  882. # Sorting using compare method
  883. occ1.sort(compare_occurrences)
  884. occ2.sort(compare_occurrences)
  885. # Comparing sorted occurrences
  886. pos = 0
  887. for entry1 in occ1:
  888. try:
  889. entry2 = occ2[pos]
  890. except IndexError:
  891. return 1
  892. pos = pos + 1
  893. if entry1[0] != entry2[0]:
  894. if entry1[0] > entry2[0]:
  895. return 1
  896. else:
  897. return -1
  898. if entry1[1] != entry2[1]:
  899. if entry1[1] > entry2[1]:
  900. return 1
  901. else:
  902. return -1
  903. # Finally: Compare message ID
  904. if self.msgid > other.msgid: return 1
  905. else: return -1
  906. def translated(self):
  907. """
  908. Return True if the entry has been translated or False.
  909. """
  910. if self.obsolete or 'fuzzy' in self.flags:
  911. return False
  912. if self.msgstr != '':
  913. return True
  914. if self.msgstr_plural:
  915. for pos in self.msgstr_plural:
  916. if self.msgstr_plural[pos] == '':
  917. return False
  918. return True
  919. return False
  920. def merge(self, other):
  921. """
  922. Merge the current entry with the given pot entry.
  923. """
  924. self.msgid = other.msgid
  925. self.occurrences = other.occurrences
  926. self.comment = other.comment
  927. self.flags = other.flags
  928. self.msgid_plural = other.msgid_plural
  929. if other.msgstr_plural:
  930. for pos in other.msgstr_plural:
  931. try:
  932. # keep existing translation at pos if any
  933. self.msgstr_plural[pos]
  934. except KeyError:
  935. self.msgstr_plural[pos] = ''
  936. # }}}
  937. # class MOEntry {{{
  938. class MOEntry(_BaseEntry):
  939. """
  940. Represents a mo file entry.
  941. **Examples**:
  942. >>> entry = MOEntry()
  943. >>> entry.msgid = 'translate me !'
  944. >>> entry.msgstr = 'traduisez moi !'
  945. >>> print(entry)
  946. msgid "translate me !"
  947. msgstr "traduisez moi !"
  948. <BLANKLINE>
  949. """
  950. def __str__(self, wrapwidth=78):
  951. """
  952. Return the string representation of the entry.
  953. """
  954. return _BaseEntry.__str__(self, wrapwidth)
  955. # }}}
  956. # class _POFileParser {{{
  957. class _POFileParser(object):
  958. """
  959. A finite state machine to parse efficiently and correctly po
  960. file format.
  961. """
  962. def __init__(self, fpath, enc=default_encoding):
  963. """
  964. Constructor.
  965. **Keyword argument**:
  966. - *fpath*: string, path to the po file
  967. """
  968. try:
  969. self.fhandle = codecs.open(fpath, 'rU', enc)
  970. except LookupError:
  971. enc = default_encoding
  972. self.fhandle = codecs.open(fpath, 'rU', enc)
  973. self.instance = POFile(fpath=fpath, encoding=enc)
  974. self.transitions = {}
  975. self.current_entry = POEntry()
  976. self.current_state = 'ST'
  977. self.current_token = None
  978. # two memo flags used in handlers
  979. self.msgstr_index = 0
  980. self.entry_obsolete = 0
  981. # Configure the state machine, by adding transitions.
  982. # Signification of symbols:
  983. # * ST: Beginning of the file (start)
  984. # * HE: Header
  985. # * TC: a translation comment
  986. # * GC: a generated comment
  987. # * OC: a file/line occurence
  988. # * FL: a flags line
  989. # * CT: a message context
  990. # * MI: a msgid
  991. # * MP: a msgid plural
  992. # * MS: a msgstr
  993. # * MX: a msgstr plural
  994. # * MC: a msgid or msgstr continuation line
  995. all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'MS', 'MP', 'MX', 'MI']
  996. self.add('TC', ['ST', 'HE'], 'HE')
  997. self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC')
  998. self.add('GC', all, 'GC')
  999. self.add('OC', all, 'OC')
  1000. self.add('FL', all, 'FL')
  1001. self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'CT')
  1002. self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'MS', 'MX'],
  1003. 'MI')
  1004. self.add('MP', ['TC', 'GC', 'MI'], 'MP')
  1005. self.add('MS', ['MI', 'MP', 'TC'], 'MS')
  1006. self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
  1007. self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX'], 'MC')
  1008. def parse(self):
  1009. """
  1010. Run the state machine, parse the file line by line and call process()
  1011. with the current matched symbol.
  1012. """
  1013. i, lastlen = 1, 0
  1014. for line in self.fhandle:
  1015. line = line.strip()
  1016. if line == '':
  1017. i = i+1
  1018. continue
  1019. if line[:3] == '#~ ':
  1020. line = line[3:]
  1021. self.entry_obsolete = 1
  1022. else:
  1023. self.entry_obsolete = 0
  1024. self.current_token = line
  1025. if line[:2] == '#:':
  1026. # we are on a occurrences line
  1027. self.process('OC', i)
  1028. elif line[:9] == 'msgctxt "':
  1029. # we are on a msgctxt
  1030. self.process('CT', i)
  1031. elif line[:7] == 'msgid "':
  1032. # we are on a msgid
  1033. self.process('MI', i)
  1034. elif line[:8] == 'msgstr "':
  1035. # we are on a msgstr
  1036. self.process('MS', i)
  1037. elif line[:1] == '"':
  1038. # we are on a continuation line or some metadata
  1039. self.process('MC', i)
  1040. elif line[:14] == 'msgid_plural "':
  1041. # we are on a msgid plural
  1042. self.process('MP', i)
  1043. elif line[:7] == 'msgstr[':
  1044. # we are on a msgstr plural
  1045. self.process('MX', i)
  1046. elif line[:3] == '#, ':
  1047. # we are on a flags line
  1048. self.process('FL', i)
  1049. elif line[:2] == '# ' or line == '#':
  1050. if line == '#': line = line + ' '
  1051. # we are on a translator comment line
  1052. self.process('TC', i)
  1053. elif line[:2] == '#.':
  1054. # we are on a generated comment line
  1055. self.process('GC', i)
  1056. i = i+1
  1057. if self.current_entry:
  1058. # since entries are added when another entry is found, we must add
  1059. # the last entry here (only if there are lines)
  1060. self.instance.append(self.current_entry)
  1061. # before returning the instance, check if there's metadata and if
  1062. # so extract it in a dict
  1063. firstentry = self.instance[0]
  1064. if firstentry.msgid == '': # metadata found
  1065. # remove the entry
  1066. firstentry = self.instance.pop(0)
  1067. self.instance.metadata_is_fuzzy = firstentry.flags
  1068. key = None
  1069. for msg in firstentry.msgstr.splitlines():
  1070. try:
  1071. key, val = msg.split(':', 1)
  1072. self.instance.metadata[key] = val.strip()
  1073. except:
  1074. if key is not None:
  1075. self.instance.metadata[key] += '\n'+ msg.strip()
  1076. # close opened file
  1077. self.fhandle.close()
  1078. return self.instance
  1079. def add(self, symbol, states, next_state):
  1080. """
  1081. Add a transition to the state machine.
  1082. Keywords arguments:
  1083. symbol -- string, the matched token (two chars symbol)
  1084. states -- list, a list of states (two chars symbols)
  1085. next_state -- the next state the fsm will have after the action
  1086. """
  1087. for state in states:
  1088. action = getattr(self, 'handle_%s' % next_state.lower())
  1089. self.transitions[(symbol, state)] = (action, next_state)
  1090. def process(self, symbol, linenum):
  1091. """
  1092. Process the transition corresponding to the current state and the
  1093. symbol provided.
  1094. Keywords arguments:
  1095. symbol -- string, the matched token (two chars symbol)
  1096. linenum -- integer, the current line number of the parsed file
  1097. """
  1098. try:
  1099. (action, state) = self.transitions[(symbol, self.current_state)]
  1100. if action():
  1101. self.current_state = state
  1102. except Exception, exc:
  1103. raise
  1104. raise IOError('Syntax error in po file (line %s)' % linenum)
  1105. # state handlers
  1106. def handle_he(self):
  1107. """Handle a header comment."""
  1108. if self.instance.header != '':
  1109. self.instance.header += '\n'
  1110. self.instance.header += self.current_token[2:]
  1111. return 1
  1112. def handle_tc(self):
  1113. """Handle a translator comment."""
  1114. if self.current_state in ['MC', 'MS', 'MX']:
  1115. self.instance.append(self.current_entry)
  1116. self.current_entry = POEntry()
  1117. if self.current_entry.tcomment != '':
  1118. self.current_entry.tcomment += '\n'
  1119. self.current_entry.tcomment += self.current_token[2:]
  1120. return True
  1121. def handle_gc(self):
  1122. """Handle a generated comment."""
  1123. if self.current_state in ['MC', 'MS', 'MX']:
  1124. self.instance.append(self.current_entry)
  1125. self.current_entry = POEntry()
  1126. if self.current_entry.comment != '':
  1127. self.current_entry.comment += '\n'
  1128. self.current_entry.comment += self.current_token[3:]
  1129. return True
  1130. def handle_oc(self):
  1131. """Handle a file:num occurence."""
  1132. if self.current_state in ['MC', 'MS', 'MX']:
  1133. self.instance.append(self.current_entry)
  1134. self.current_entry = POEntry()
  1135. occurrences = self.current_token[3:].split()
  1136. for occurrence in occurrences:
  1137. if occurrence != '':
  1138. try:
  1139. fil, line = occurrence.split(':')
  1140. if not line.isdigit():
  1141. fil = fil + line
  1142. line = ''
  1143. self.current_entry.occurrences.append((fil, line))
  1144. except:
  1145. self.current_entry.occurrences.append((occurrence, ''))
  1146. return True
  1147. def handle_fl(self):
  1148. """Handle a flags line."""
  1149. if self.current_state in ['MC', 'MS', 'MX']:
  1150. self.instance.append(self.current_entry)
  1151. self.current_entry = POEntry()
  1152. self.current_entry.flags += self.current_token[3:].split(', ')
  1153. return True
  1154. def handle_ct(self):
  1155. """Handle a msgctxt."""
  1156. if self.current_state in ['MC', 'MS', 'MX']:
  1157. self.instance.append(self.current_entry)
  1158. self.current_entry = POEntry()
  1159. self.current_entry.msgctxt = unescape(self.current_token[9:-1])
  1160. return True
  1161. def handle_mi(self):
  1162. """Handle a msgid."""
  1163. if self.current_state in ['MC', 'MS', 'MX']:
  1164. self.instance.append(self.current_entry)
  1165. self.current_entry = POEntry()
  1166. self.current_entry.obsolete = self.entry_obsolete
  1167. self.current_entry.msgid = unescape(self.current_token[7:-1])
  1168. return True
  1169. def handle_mp(self):
  1170. """Handle a msgid plural."""
  1171. self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
  1172. return True
  1173. def handle_ms(self):
  1174. """Handle a msgstr."""
  1175. self.current_entry.msgstr = unescape(self.current_token[8:-1])
  1176. return True
  1177. def handle_mx(self):
  1178. """Handle a msgstr plural."""
  1179. index, value = self.current_token[7], self.current_token[11:-1]
  1180. self.current_entry.msgstr_plural[index] = unescape(value)
  1181. self.msgstr_index = index
  1182. return True
  1183. def handle_mc(self):
  1184. """Handle a msgid or msgstr continuation line."""
  1185. if self.current_state == 'CT':
  1186. self.current_entry.msgctxt += unescape(self.current_token[1:-1])
  1187. elif self.current_state == 'MI':
  1188. self.current_entry.msgid += unescape(self.current_token[1:-1])
  1189. elif self.current_state == 'MP':
  1190. self.current_entry.msgid_plural += \
  1191. unescape(self.current_token[1:-1])
  1192. elif self.current_state == 'MS':
  1193. self.current_entry.msgstr += unescape(self.current_token[1:-1])
  1194. elif self.current_state == 'MX':
  1195. msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\
  1196. unescape(self.current_token[1:-1])
  1197. self.current_entry.msgstr_plural[self.msgstr_index] = msgstr
  1198. # don't change the current state
  1199. return False
  1200. # }}}
  1201. # class _MOFileParser {{{
  1202. class _MOFileParser(object):
  1203. """
  1204. A class to parse binary mo files.
  1205. """
  1206. BIG_ENDIAN = 0xde120495
  1207. LITTLE_ENDIAN = 0x950412de
  1208. def __init__(self, fpath, enc=default_encoding):
  1209. """_MOFileParser constructor."""
  1210. self.fhandle = open(fpath, 'rb')
  1211. self.instance = MOFile(fpath=fpath, encoding=enc)
  1212. def parse_magicnumber(self):
  1213. """
  1214. Parse the magic number and raise an exception if not valid.
  1215. """
  1216. def parse(self):
  1217. """
  1218. Build the instance with the file handle provided in the
  1219. constructor.
  1220. """
  1221. magic_number = self._readbinary('<I', 4)
  1222. if magic_number == self.LITTLE_ENDIAN:
  1223. ii = '<II'
  1224. elif magic_number == self.BIG_ENDIAN:
  1225. ii = '>II'
  1226. else:
  1227. raise IOError('Invalid mo file, magic number is incorrect !')
  1228. self.instance.magic_number = magic_number
  1229. # parse the version number and the number of strings
  1230. self.instance.version, numofstrings = self._readbinary(ii, 8)
  1231. # original strings and translation strings hash table offset
  1232. msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
  1233. # move to msgid hash table and read length and offset of msgids
  1234. self.fhandle.seek(msgids_hash_offset)
  1235. msgids_index = []
  1236. for i in range(numofstrings):
  1237. msgids_index.append(self._readbinary(ii, 8))
  1238. # move to msgstr hash table and read length and offset of msgstrs
  1239. self.fhandle.seek(msgstrs_hash_offset)
  1240. msgstrs_index = []
  1241. for i in range(numofstrings):
  1242. msgstrs_index.append(self._readbinary(ii, 8))
  1243. # build entries
  1244. for i in range(numofstrings):
  1245. self.fhandle.seek(msgids_index[i][1])
  1246. msgid = self.fhandle.read(msgids_index[i][0])
  1247. self.fhandle.seek(msgstrs_index[i][1])
  1248. msgstr = self.fhandle.read(msgstrs_index[i][0])
  1249. if i == 0: # metadata
  1250. raw_metadata, metadata = msgstr.split('\n'), {}
  1251. for line in raw_metadata:
  1252. tokens = line.split(':', 1)
  1253. if tokens[0] != '':
  1254. try:
  1255. metadata[tokens[0]] = tokens[1].strip()
  1256. except IndexError:
  1257. metadata[tokens[0]] = ''
  1258. self.instance.metadata = metadata
  1259. continue
  1260. entry = MOEntry(msgid=msgid, msgstr=msgstr)
  1261. self.instance.append(entry)
  1262. # close opened file
  1263. self.fhandle.close()
  1264. return self.instance
  1265. def _readbinary(self, fmt, numbytes):
  1266. """
  1267. Private method that unpack n bytes of data using format <fmt>.
  1268. It returns a tuple or a mixed value if the tuple length is 1.
  1269. """
  1270. bytes = self.fhandle.read(numbytes)
  1271. tup = struct.unpack(fmt, bytes)
  1272. if len(tup) == 1:
  1273. return tup[0]
  1274. return tup
  1275. # }}}
  1276. # __main__ {{{
  1277. if __name__ == '__main__':
  1278. """
  1279. **Main function**::
  1280. - to **test** the module just run: *python polib.py [-v]*
  1281. - to **profile** the module: *python polib.py -p <some_pofile.po>*
  1282. """
  1283. import sys
  1284. if len(sys.argv) > 2 and sys.argv[1] == '-p':
  1285. def test(f):
  1286. if f.endswith('po'):
  1287. p = pofile(f)
  1288. else:
  1289. p = mofile(f)
  1290. s = str(p)
  1291. import profile
  1292. profile.run('test("'+sys.argv[2]+'")')
  1293. else:
  1294. import doctest
  1295. doctest.testmod()
  1296. # }}}