去除中文
#去除中文
import re
p1='帮会建了徽信群 没在群里的加下徽信:[30109552300],晚上群里有活动通知大家,(抢资源),争地盘,谢谢配合。i love you '
linee=re.sub('[\u4e00-\u9fa5]', '', p1)
print(linee)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
:[30109552300],,(),,。i love you
- 1
去除标点
simple_punctuation = '[’!"#$%&\'()*+,-/:;<=>?@[\\]^_`{|}~,。,]'
line = re.sub(simple_punctuation, '', linee)
- 1
- 2
去除数字
re.sub("[0-9]", " ", line)
- 1
' i love you '
- 1