|
113 | 113 | dictf.write('!SIL SIL\n')
|
114 | 114 |
|
115 | 115 | for token in sorted(lex):
|
| 116 | + multi = lex.get_multi(token) |
| 117 | + for form in multi: |
| 118 | + ipa = multi[form]['ipa'] |
| 119 | + xsr = ipa2xsampa (token, ipa, spaces=True) |
116 | 120 |
|
117 |
| - ipa = lex[token]['ipa'] |
118 |
| - xsr = ipa2xsampa (token, ipa, spaces=True) |
| 121 | + xs = xsr.replace('-','').replace('\' ', '\'').replace(' ', ' ').replace('#', 'nC') |
119 | 122 |
|
120 |
| - xs = xsr.replace('-','').replace('\' ', '\'').replace(' ', ' ').replace('#', 'nC') |
| 123 | + dictf.write((u'%s %s\n' % (token, xs)).encode('utf8')) |
121 | 124 |
|
122 |
| - dictf.write((u'%s %s\n' % (token, xs)).encode('utf8')) |
| 125 | + for p in xs.split(' '): |
123 | 126 |
|
124 |
| - for p in xs.split(' '): |
| 127 | + if len(p)<1: |
| 128 | + logging.error ( u"****ERROR: empty phoneme in : '%s' ('%s', ipa: '%s', token: '%s')" % (xs, xsr, ipa, token) ) |
125 | 129 |
|
126 |
| - if len(p)<1: |
127 |
| - logging.error ( u"****ERROR: empty phoneme in : '%s' ('%s', ipa: '%s', token: '%s')" % (xs, xsr, ipa, token) ) |
| 130 | + pws = p[1:] if p[0] == '\'' else p |
128 | 131 |
|
129 |
| - pws = p[1:] if p[0] == '\'' else p |
130 |
| - |
131 |
| - if not pws in ps: |
132 |
| - ps[pws] = set([p]) |
133 |
| - else: |
134 |
| - ps[pws].add(p) |
| 132 | + if not pws in ps: |
| 133 | + ps[pws] = set([p]) |
| 134 | + else: |
| 135 | + ps[pws].add(p) |
135 | 136 |
|
136 | 137 | logging.info ( "%s written." % dictfn2 )
|
137 | 138 |
|
|
0 commit comments