This commit is contained in:
elem_work 2017-11-28 00:52:47 -06:00
parent 5aef60aa91
commit 64698013ef
2 changed files with 272 additions and 17 deletions

17
main.py
View File

@ -1,19 +1,8 @@
from retEmoDict import emoDic
from newsTrain import classifyNews
from clust import clustering
from newsTrain import trainVect, flagger
temp = clustering()
sert = trainVect()
emoDict = emoDic()
rest = []
for i in temp:
rest.append(classifyNews(i, emoDict))
for i in rest:
for i in sert:
print(i)

View File

@ -1,6 +1,10 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from stopWords import stopWrdList
from retEmoDict import emoDic
from clust import clustering
import operator
def classifyNews(word_array, dict):
def trainPre(word_array, dict):
default = 'NA'
alegria = []
@ -72,6 +76,268 @@ def classifyNews(word_array, dict):
if len(flag) == 0:
flag = ['NEU']
return [('Positive:', pos), ('Negative:', neg), flag, vect]
return [pos, neg, flag, vect]
def corporizer():
emoDict = emoDic()
clust = clustering()
temp = []
for i in clust:
temp.append(trainPre(i, emoDict))
tempy = []
for vect in temp:
tempy.append(' '.join(vect[3]))
return tempy
def flagger():
emoDict = emoDic()
clust = clustering()
temp = []
for i in clust:
temp.append(trainPre(i, emoDict))
flag = []
for j in temp:
#print(j[2])
if j[2] == (['CONTRA', 'NEU', 'PRI'] or ['NEU', 'CONTRA', 'PRI'] or ['NEU', 'PRI', 'CONTRA'] or
['PRI', 'NEU', 'CONTRA'] or ['CONTRA', 'PRI', 'NEU'] or ['PRI', 'CONTRA', 'NEU']):
flag.append(1)
#else:
# flag.append(0)
if j[2] == (['CONTRA', 'PRI'] or ['PRI', 'CONTRA']):
flag.append(1)
#else:
# flag.append(6)
if j[2] == ['NEU']:
flag.append(1)
#else:
# flag.append(7)
if j[2] == (['PRI'] or ['NEU', 'PRI'] or ['PRI', 'NEU']):
flag.append(2)
#else:
# flag.append(8)
if j[2] == (['CONTRA'] or ['NEU', 'CONTRA'] or ['CONTRA', 'NEU']):
flag.append(3)
#else:
# flag.append(9)
index = []
for i in temp:
if i[0] == i[1]:
index.append(1)
if i[0] > i[1]:
index.append(2)
if i[0] < i[1]:
index.append(3)
lenFlag = len(flag)
lenInde = len(index)
if lenFlag < lenInde:
for i in range(lenInde - lenFlag):
flag.append(1)
return (index, flag)
def operate_on_Narray(A, B, function):
try:
return [operate_on_Narray(a, b, function) for a, b in zip(A, B)]
except TypeError as e:
# Not iterable
return function(A, B)
def trainVect():
flag = flagger()
corpus = corporizer()
stop_words = stopWrdList()
vectorizer = TfidfVectorizer(strip_accents='ascii', analyzer='word', stop_words=stop_words)
X = vectorizer.fit_transform(corpus)
vector = X.toarray()
long = len(flag[0])
part_neu_ind = []
part_neg_ind = []
part_pos_ind = []
cont_neu_ind = []
cont_neg_ind = []
cont_pos_ind = []
neut_neu_ind = []
neut_neg_ind = []
neut_pos_ind = []
# flag 0 has emotion info, flag 1 has political party info
# 1 is neutral emo ; 2 is positive emo ; 3 is negative emo
# 1 is neutral ; 2 is pol; 3 is opposition
for s in range(long):
if flag[0][s] == 1 and flag[1][s] == 1:
neut_neu_ind.append(s)
if flag[0][s] == 1 and flag[1][s] == 2:
part_neu_ind.append(s)
if flag[0][s] == 1 and flag[1][s] == 3:
cont_neu_ind.append(s)
if flag[0][s] == 2 and flag[1][s] == 2:
part_pos_ind.append(s)
if flag[0][s] == 2 and flag[1][s] == 3:
cont_pos_ind.append(s)
if flag[0][s] == 2 and flag[1][s] == 1:
neut_pos_ind.append(s)
if flag[0][s] == 3 and flag[1][s] == 1:
neut_neg_ind.append(s)
if flag[0][s] == 3 and flag[1][s] == 2:
part_neg_ind.append(s)
if flag[0][s] == 3 and flag[1][s] == 3:
cont_neg_ind.append(s)
part_neu_vect = [vector[x] for x in part_neu_ind]
part_neg_vect = [vector[x] for x in part_neg_ind]
part_pos_vect = [vector[x] for x in part_pos_ind]
cont_neu_vect = [vector[x] for x in cont_neu_ind]
cont_neg_vect = [vector[x] for x in cont_neg_ind]
cont_pos_vect = [vector[x] for x in cont_pos_ind]
neut_neu_vect = [vector[x] for x in neut_neu_ind]
neut_neg_vect = [vector[x] for x in neut_neg_ind]
neut_pos_vect = [vector[x] for x in neut_pos_ind]
len1 = len(part_neu_vect)
if len1 != 0:
for a in range(len1):
tmp = part_neu_vect[0]
tmp = operate_on_Narray(part_neu_vect[0], tmp[a+1], lambda x, y: x + y)
tmp = operate_on_Narray(part_neu_vect[0], tmp[a+1], lambda x, y: x / len1)
part_neu_vect = list(tmp)
else:
part_neu_vect = []
len1 = len(part_neg_vect)
if len1 != 0:
for a in range(len1):
tmp = part_neg_vect[0]
tmp = operate_on_Narray(part_neg_vect[0], tmp[a + 1], lambda x, y: x + y)
tmp = operate_on_Narray(part_neg_vect[0], tmp[a + 1], lambda x, y: x / len1)
part_neg_vect = list(tmp)
else:
part_neg_vect = []
len1 = len(part_pos_vect)
if len1 != 0:
for a in range(len1):
tmp = part_pos_vect[0]
tmp = operate_on_Narray(part_pos_vect[0], tmp[a + 1], lambda x, y: x + y)
tmp = operate_on_Narray(part_pos_vect[0], tmp[a + 1], lambda x, y: x / len1)
part_pos_vect = list(tmp)
else:
part_pos_vect = []
len1 = len(cont_neu_vect)
if len1 != 0:
for a in range(len1):
tmp = cont_neu_vect[0]
tmp = operate_on_Narray(cont_neu_vect[0], tmp[a + 1], lambda x, y: x + y)
tmp = operate_on_Narray(cont_neu_vect[0], tmp[a + 1], lambda x, y: x / len1)
cont_neu_vect = list(tmp)
else:
cont_neu_vect = []
len1 = len(cont_neg_vect)
if len1 != 0:
for a in range(len1):
tmp = cont_neg_vect[0]
tmp = operate_on_Narray(cont_neg_vect[0], tmp[a + 1], lambda x, y: x + y)
tmp = operate_on_Narray(cont_neg_vect[0], tmp[a + 1], lambda x, y: x / len1)
cont_neg_vect = list(tmp)
else:
cont_neg_vect = []
len1 = len(cont_pos_vect)
if len1 != 0:
for a in range(len1):
tmp = cont_pos_vect[0]
tmp = operate_on_Narray(cont_pos_vect[0], tmp[a + 1], lambda x, y: x + y)
tmp = operate_on_Narray(cont_pos_vect[0], tmp[a + 1], lambda x, y: x / len1)
cont_pos_vect = list(tmp)
else:
cont_pos_vect = []
len1 = len(neut_neg_vect)
if len1 != 0:
for a in range(len1):
tmp = neut_neg_vect[0]
tmp = operate_on_Narray(neut_neg_vect[0], tmp[a + 1], lambda x, y: x + y)
tmp = operate_on_Narray(neut_neg_vect[0], tmp[a + 1], lambda x, y: x / len1)
neut_neg_vect = list(tmp)
else:
neut_neg_vect = []
len1 = len(neut_pos_vect)
if len1 != 0:
for a in range(len1):
tmp = neut_pos_vect[0]
tmp = operate_on_Narray(neut_pos_vect[0], tmp[a + 1], lambda x, y: x + y)
tmp = operate_on_Narray(neut_pos_vect[0], tmp[a + 1], lambda x, y: x / len1)
neut_pos_vect = list(tmp)
else:
neut_pos_vect = []
return [part_neu_vect, part_neg_vect, part_pos_vect, cont_neu_vect, cont_neg_vect, cont_pos_vect, neut_neu_vect, neut_neg_vect, neut_pos_vect]