|
|
# -*- coding: utf-8 -*-
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('text, text_seq_length, bpe_dropout', [
|
|
|
('hello, how are you?', 128, 0.1),
|
|
|
('hello, how are you?', 128, 0.5),
|
|
|
('hello, how are you?', 128, 1.0),
|
|
|
('hello ... how are you ?', 256, 1.0),
|
|
|
('a person standing at a table with bottles of win', 64, 0.5),
|
|
|
('привет как дела???', 76, 0.0),
|
|
|
('клип на русском языке :)', 76, 0.1),
|
|
|
])
|
|
|
def test_encode_decode_text_yttm(yttm_tokenizer, text, text_seq_length, bpe_dropout):
|
|
|
tokens = yttm_tokenizer.encode_text(text, text_seq_length=text_seq_length, bpe_dropout=bpe_dropout)
|
|
|
decoded_text = yttm_tokenizer.decode_text(tokens)
|
|
|
assert text == decoded_text
|