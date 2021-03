def word2id(corpus, word_to_id, max_length):

result = []

for line in corpus:

if line == '': # 空行はスキップ

continue

if '(' in line or '―' in line: # かっこと「―」を含む文はスキップ

continue

tmp = [word_to_id[word] for word in line.split(' ')]

if len(tmp) > max_length: # 形態素の数がmax_lengthより大きければ省略

continue

tmp += [0] * (max_length - len(tmp))

result.append(tmp)

return result



def id2word(id_data, id_to_word):

result = ''

for line in id_data:

result += ''.join([id_to_word[id] for id in line if id != 0]) + '

'

return result