python文字探勘 分词(Tokenization)

import nltk
# 只需要运行一次
try:
    nltk.data.find('tokenizers/punkt')
except nltk.downloader.DownloadError:
    nltk.download('punkt')
import nltk
from nltk.tokenize import word_tokenize

# 示例文本
text = "NLTK is a powerful library for NLP. It's often used for academic research."

# 分词
nltk_tokens = word_tokenize(text)

print("--- NLTK 分词结果 ---")
print(nltk_tokens)
# 预期输出: ['NLTK', 'is', 'a', 'powerful', 'library', 'for', 'NLP', '.', 'It', "'s", 'often', 'used', 'for', 'academic', 'research', '.']