Skip to content

Commit ec37ac0

Browse files
committed
Export every form from lexicon
1 parent 8c05c78 commit ec37ac0

File tree

1 file changed

+14
-13
lines changed

1 file changed

+14
-13
lines changed

speech_kaldi_adapt.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -113,25 +113,26 @@
113113
dictf.write('!SIL SIL\n')
114114

115115
for token in sorted(lex):
116+
multi = lex.get_multi(token)
117+
for form in multi:
118+
ipa = multi[form]['ipa']
119+
xsr = ipa2xsampa (token, ipa, spaces=True)
116120

117-
ipa = lex[token]['ipa']
118-
xsr = ipa2xsampa (token, ipa, spaces=True)
121+
xs = xsr.replace('-','').replace('\' ', '\'').replace(' ', ' ').replace('#', 'nC')
119122

120-
xs = xsr.replace('-','').replace('\' ', '\'').replace(' ', ' ').replace('#', 'nC')
123+
dictf.write((u'%s %s\n' % (token, xs)).encode('utf8'))
121124

122-
dictf.write((u'%s %s\n' % (token, xs)).encode('utf8'))
125+
for p in xs.split(' '):
123126

124-
for p in xs.split(' '):
127+
if len(p)<1:
128+
logging.error ( u"****ERROR: empty phoneme in : '%s' ('%s', ipa: '%s', token: '%s')" % (xs, xsr, ipa, token) )
125129

126-
if len(p)<1:
127-
logging.error ( u"****ERROR: empty phoneme in : '%s' ('%s', ipa: '%s', token: '%s')" % (xs, xsr, ipa, token) )
130+
pws = p[1:] if p[0] == '\'' else p
128131

129-
pws = p[1:] if p[0] == '\'' else p
130-
131-
if not pws in ps:
132-
ps[pws] = set([p])
133-
else:
134-
ps[pws].add(p)
132+
if not pws in ps:
133+
ps[pws] = set([p])
134+
else:
135+
ps[pws].add(p)
135136

136137
logging.info ( "%s written." % dictfn2 )
137138

0 commit comments

Comments
 (0)