#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/12/12 0012 上午 10:34
# @Author : XXX
# @Function : 情感分析
# @File : sentimentAanlaye.py
# @IDE :python 3.7
import re
import os
import jieba
jieba.add_word(‘超‘)
jieba.load_userdict(r‘D:\python安装\Lib\site-packages\jieba\dict2.txt‘)
import time
from numpy import *
t_start = time.time()
os.chdir(r‘E:\guthub\gensim_w2v\知网Hownet情感词典‘)
#---------------------情感分析------------------
add_punc = “!??\\ # $%&'()*+,-/:;<=>@[\]^_`{|}~?????、〃》「」『』【】〔〕〖〗\“???〝〞????–—‘’?“”??…?﹏.“
def readTestText(filepath):#读入测试语料
testText = [line.strip() for line in open(filepath ‘r‘encoding = ‘utf_8‘).readlines()]
return testText
def stopWordList(filepath):#停用词表
stopWords = [line.strip() for line in open(filepath‘r‘encoding = ‘utf_8‘).readlines()]
return stopWords
def positivedict(filepath):#褒义词表
positiveDict = [line.strip() for line in open(filepath‘r‘encoding = ‘utf_8‘).readlines()]
return positiveDict
def negetivedict(filepath):#贬义词表
negetiveDict = [line.strip() for line in open(filepath‘r‘encoding = ‘utf_8‘).readlines()]
return negetiveDict
def leveldict(filepath):#程度副词表
levelList =[]
levelDict = []
Dict = [line.strip() for line in open(filepath‘r‘encoding = ‘utf_8‘).readlines()]
for line in Dict:
element = line.split(‘‘)
levelList.append(float(element[1]))
levelDict.append(element[0])
return levelDictlevelList
def denydict(filepath):#否定词表
denyDict = [line.strip() for line in open(filepath‘r‘encoding = ‘utf_8‘).readlines()]
return denyDict
def themedict(filepath):#主题词表
themeList =[]
themeDict = []
Dict = [line.strip() for line in open(filepath‘r‘encoding = ‘utf_8‘).readlines()]
for line in Dict:
element = line.split(‘‘)
themeList.append(float(element[1]))
themeDict.append(element[0])
return themeDictthemeList
‘‘‘
----------------------------------
def relatedict(filepath):#关联词表
relateList =[]
relateDict = []
Dict = [line.strip() for line in open(filepath‘r‘encoding = ‘utf_8‘).readlines()]
for line in Dict:
element = line.split(‘‘)
relateList.append(float(element[1]))
relateDict.append(element[0])
return relateDictrelateList#返回词与标签
‘‘‘
testText = readTestText(‘mixcomment.txt‘)
positiveDict = positivedict(‘positiveDict.txt‘)
negetiveDict = negetivedict(‘negetiveDict.txt‘)
levelDictlevelList = leveldict(‘levelDict.txt‘)
denyDict = denydict(‘denyDict.txt‘)
stopWords = stopWordList(‘stop_Words.txt‘)
themeDictthemeList = themedict(‘themeDict.txt‘)
#print(themeList[0:5]themeDict[0:5])
#relateDictrelateList = relatedict(‘relateDict.txt‘)
print(‘-------所有字典读取完毕------‘)
lengthOfText = len(testText)
label = open(‘label.txt‘‘w‘encoding =“utf_8“)
for i in range(lengthOfText):#分句
content = testText[i].strip()
regex = “,|。|!|?|、|\\s+“
conte
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-12-21 10:51 Hownet+NTUSD+情感分析python代码\
文件 8371 2018-12-21 10:50 Hownet+NTUSD+情感分析python代码\annalysi.py
文件 81726 2018-12-18 15:38 Hownet+NTUSD+情感分析python代码\archivetempNTUSD_negative_simplified.txt
文件 27042 2018-12-18 15:37 Hownet+NTUSD+情感分析python代码\archivetempNTUSD_positive_simplified.txt
文件 2114 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\程度级别词语(英文).txt
文件 1478 2011-11-20 14:51 Hownet+NTUSD+情感分析python代码\程度级别词语(中文).txt
文件 57704 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\负面评价词语(英文).txt
文件 26101 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\负面评价词语(中文).txt
文件 18511 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\负面情感词语(英文).txt
文件 74906 2018-12-18 15:40 Hownet+NTUSD+情感分析python代码\负面情感词语(中文).txt
文件 61667 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\正面评价词语(英文).txt
文件 30409 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\正面评价词语(中文).txt
文件 14190 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\正面情感词语(英文).txt
文件 27670 2018-12-18 15:39 Hownet+NTUSD+情感分析python代码\正面情感词语(中文).txt
文件 451 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\主张词语(英文).txt
文件 289 2007-10-21 16:16 Hownet+NTUSD+情感分析python代码\主张词语(中文).txt