TOKENIZE
功能说明
分词函数,如果想检查分词实际效果或者对一段文本进行分词的话,可以使用 tokenize 函数
参数说明
返回结果
返回值为array<string>
案例
--使用keworkd分词
SELECT TOKENIZE('Lakehouse的倒排索引',map('analyzer', 'keyword')) as toke;
+--------------------+
| toke |
+--------------------+
| ["Lakehouse的倒排索引"] |
+--------------------+
SELECT TOKENIZE('Lakehouse的倒排索引',map('analyzer', 'chinese')) as toke;
+--------------------------------+
| toke |
+--------------------------------+
| ["lakehouse","的","倒排","索引"] |
+--------------------------------+
--使用unicode分词
SELECT TOKENIZE('Lakehouse的倒排索引',map('analyzer', 'unicode')) as toke;
+--------------------------------+
| toke |
+--------------------------------+
| ["lakehouse","的","倒","排","索引"] |
+--------------------------------+
--使用english分词
SELECT TOKENIZE('Lakehouse inverted index',map('analyzer', 'english')) as toke;
+----------------------------------+
| toke |
+----------------------------------+
| ["lakehouse","inverted","index"] |
+----------------------------------+