Compare commits
9 Commits
fedora25_w
...
master
Author | SHA1 | Date |
---|---|---|
Eddie | f03ade52aa | |
Eddie | 10eecbf0c8 | |
elem_work | 64698013ef | |
elem_work | 5aef60aa91 | |
Eddie Cueto-Mendoza | 71715a021c | |
Eddie Cueto-Mendoza | 2c81e015ca | |
Eddie Cueto-Mendoza | a56b8578b3 | |
Eddie Cueto-Mendoza | db242c45eb | |
Eddie Cueto-Mendoza | 5ef1f659b6 |
|
@ -0,0 +1,6 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -0,0 +1,80 @@
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from stopWords import stopWrdList
|
||||||
|
|
||||||
|
def getTrnVect():
|
||||||
|
# code to get the trained vectors
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
str_trained_vect = open('trn_vect.vec', 'r').read().split('\n')
|
||||||
|
|
||||||
|
str_trained_vect.pop(len(str_trained_vect)-1)
|
||||||
|
|
||||||
|
|
||||||
|
trained_vect = []
|
||||||
|
for i in str_trained_vect:
|
||||||
|
trained_vect.append(yaml.load(i))
|
||||||
|
|
||||||
|
|
||||||
|
del str_trained_vect, i
|
||||||
|
|
||||||
|
return trained_vect
|
||||||
|
|
||||||
|
|
||||||
|
def classify_news(document):
|
||||||
|
# code to vectorize news to classify
|
||||||
|
|
||||||
|
from similarityMeasures import cos_sim
|
||||||
|
|
||||||
|
vect_to_classify = []
|
||||||
|
|
||||||
|
news = open(document, 'r').read()
|
||||||
|
|
||||||
|
vect_to_classify.append(news)
|
||||||
|
|
||||||
|
stop_words = stopWrdList()
|
||||||
|
|
||||||
|
vectorizer = TfidfVectorizer(strip_accents='ascii', analyzer='word', stop_words=stop_words, max_features=100)
|
||||||
|
|
||||||
|
X = vectorizer.fit_transform(vect_to_classify)
|
||||||
|
vector = X.toarray()
|
||||||
|
|
||||||
|
trained_vectors = getTrnVect()
|
||||||
|
|
||||||
|
# get dim
|
||||||
|
|
||||||
|
len_vector = len(vector[0])
|
||||||
|
len_train = len(trained_vectors[0])
|
||||||
|
|
||||||
|
vector = list(vector[0])
|
||||||
|
if len_train > len_vector:
|
||||||
|
for i in range(len_train - len_vector):
|
||||||
|
vector.append(0)
|
||||||
|
|
||||||
|
sim_vect = []
|
||||||
|
for i in trained_vectors:
|
||||||
|
sim_vect.append(cos_sim(vector, i))
|
||||||
|
|
||||||
|
|
||||||
|
maxi = max(sim_vect)
|
||||||
|
|
||||||
|
|
||||||
|
x = 0
|
||||||
|
for i in sim_vect:
|
||||||
|
if i == maxi:
|
||||||
|
y = x
|
||||||
|
x = x + 1
|
||||||
|
|
||||||
|
part_neu_vect = 'This note has neutral emotions and it is related with the party'
|
||||||
|
part_neg_vect = 'This note has negative emotions and it is related with the party'
|
||||||
|
part_pos_vect = 'This note has positive emotions and it is related with the party'
|
||||||
|
cont_neu_vect = 'This note has neutral emotions and it is related with the opposition'
|
||||||
|
cont_neg_vect = 'This note has negative emotions and it is related with the opposition'
|
||||||
|
cont_pos_vect = 'This note has positive emotions and it is related with the opposition'
|
||||||
|
neut_neu_vect = 'This note has neutral emotions and it is not particularly related to a political party'
|
||||||
|
neut_neg_vect = 'This note has negative emotions and it is not particularly related to a political party'
|
||||||
|
neut_pos_vect = 'This note has positive emotions and it is not particularly related to a political party'
|
||||||
|
|
||||||
|
results = [part_neu_vect, part_neg_vect, part_pos_vect, cont_neu_vect, cont_neg_vect, cont_pos_vect, neut_neu_vect, neut_neg_vect, neut_pos_vect]
|
||||||
|
|
||||||
|
print(results[y])
|
|
@ -1,45 +1,49 @@
|
||||||
from infBack import get_vect as gv
|
from infBack import get_vect as gv
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from stopWords import stopWrdList
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def stopWrdList():
|
def clustering():
|
||||||
sw = open('stop.words')
|
|
||||||
prue = []
|
|
||||||
prue.append(sw.readlines())
|
|
||||||
return [l.strip('\n\r') for l in prue[0]]
|
|
||||||
|
|
||||||
|
# This are the relevant news cue words
|
||||||
|
voc = ["ine", "pri", "pan", "prd", "pt", "pvem", "verde", "movimiento", "ciudadano", "panal", "alianza", "morena", "partido", "encuentro", "social", "electoral"]
|
||||||
|
|
||||||
voc = ["ine", "pri", "pan", "prd", "pt", "pvem", "verde", "movimiento", "ciudadano", "panal", "alianza", "morena", "partido", "encuentro", "social", "electoral"]
|
stop_words = stopWrdList()
|
||||||
|
|
||||||
stop_words = stopWrdList()
|
dataVect = gv()
|
||||||
|
|
||||||
dataVect = gv()
|
dataVect = np.array(dataVect)
|
||||||
|
|
||||||
dataVect = np.array(dataVect)
|
corpus = dataVect[:, 2]
|
||||||
|
|
||||||
corpus = dataVect[:, 2]
|
vectorizer = TfidfVectorizer(strip_accents='ascii', analyzer='word', stop_words=stop_words, vocabulary=voc)
|
||||||
|
|
||||||
vectorizer = TfidfVectorizer(strip_accents='ascii', analyzer='word', stop_words=stop_words, vocabulary=voc)
|
X = vectorizer.fit_transform(corpus)
|
||||||
|
|
||||||
X = vectorizer.fit_transform(corpus)
|
del dataVect, stop_words, vectorizer # , corpus
|
||||||
|
|
||||||
del dataVect, stop_words, vectorizer # , corpus
|
J = X.toarray()
|
||||||
|
|
||||||
J = X.toarray()
|
# The indexes are extracted to obtain only the relevant news from the general corpus
|
||||||
|
|
||||||
# print(J)
|
index = []
|
||||||
|
|
||||||
index = []
|
for x in range(0, len(J)):
|
||||||
|
if sum(J[x]) != 0:
|
||||||
|
index.append(x)
|
||||||
|
|
||||||
for x in range(0, len(J)):
|
index = tuple(index)
|
||||||
if sum(J[x]) != 0:
|
|
||||||
index.append(x)
|
|
||||||
|
|
||||||
index = tuple(index)
|
electCorp = [corpus[x] for x in index]
|
||||||
|
|
||||||
electCorp = [corpus[x] for x in index]
|
del corpus
|
||||||
|
|
||||||
del corpus
|
# This section of the code processes the political party news in order to give a emotional classification
|
||||||
|
|
||||||
print(electCorp)
|
temp = []
|
||||||
|
|
||||||
|
for i in electCorp:
|
||||||
|
temp.append(i.split(' '))
|
||||||
|
|
||||||
|
return temp
|
||||||
|
|
|
@ -27,8 +27,6 @@ def get_vect():
|
||||||
return impDat
|
return impDat
|
||||||
|
|
||||||
|
|
||||||
# print(len(get_vect()))
|
|
||||||
|
|
||||||
|
|
||||||
# this section of the code show how to extract relevant data from the dictionaries
|
# this section of the code show how to extract relevant data from the dictionaries
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
# from newsTrain import saveTraining
|
||||||
|
from classify_news import classify_news
|
||||||
|
|
||||||
|
# saveTraining()
|
||||||
|
|
||||||
|
classify_news('news_to_classify.txt')
|
||||||
|
classify_news('news2.txt')
|
|
@ -0,0 +1,382 @@
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from stopWords import stopWrdList
|
||||||
|
from retEmoDict import emoDic
|
||||||
|
from clust import clustering
|
||||||
|
|
||||||
|
def trainPre(word_array, dict):
|
||||||
|
|
||||||
|
default = 'NA'
|
||||||
|
alegria = []
|
||||||
|
enojo = []
|
||||||
|
miedo = []
|
||||||
|
repulsion = []
|
||||||
|
sorpresa = []
|
||||||
|
tristeza = []
|
||||||
|
proper = []
|
||||||
|
part = []
|
||||||
|
|
||||||
|
for word in word_array:
|
||||||
|
if dict.get(str(word), default) == 'Alegría':
|
||||||
|
alegria.append(1)
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Enojo':
|
||||||
|
enojo.append(1)
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Miedo':
|
||||||
|
miedo.append(1)
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Repulsión':
|
||||||
|
repulsion.append(1)
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Sorpresa':
|
||||||
|
sorpresa.append(1)
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Tristeza':
|
||||||
|
tristeza.append(1)
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Positivo':
|
||||||
|
part.append('PRI')
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Negativo':
|
||||||
|
part.append('CONTRA')
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'Neutro':
|
||||||
|
part.append('NEU')
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
if dict.get(str(word), default) == 'NA':
|
||||||
|
proper.append(word)
|
||||||
|
|
||||||
|
|
||||||
|
part = set(part)
|
||||||
|
flag = list(part)
|
||||||
|
vect = set(proper)
|
||||||
|
vect = list(vect)
|
||||||
|
tot = len(word_array)
|
||||||
|
|
||||||
|
alegria = sum(alegria)
|
||||||
|
enojo = sum(enojo)
|
||||||
|
miedo = sum(miedo)
|
||||||
|
repulsion = sum(repulsion)
|
||||||
|
sorpresa = sum(sorpresa)
|
||||||
|
tristeza = sum(tristeza)
|
||||||
|
|
||||||
|
pos = (alegria + sorpresa) / tot
|
||||||
|
neg = (enojo + miedo + repulsion + tristeza) / tot
|
||||||
|
|
||||||
|
if len(flag) == 0:
|
||||||
|
flag = ['NEU']
|
||||||
|
|
||||||
|
return [pos, neg, flag, vect]
|
||||||
|
|
||||||
|
def corporizer():
|
||||||
|
emoDict = emoDic()
|
||||||
|
clust = clustering()
|
||||||
|
|
||||||
|
temp = []
|
||||||
|
for i in clust:
|
||||||
|
temp.append(trainPre(i, emoDict))
|
||||||
|
|
||||||
|
tempy = []
|
||||||
|
for vect in temp:
|
||||||
|
tempy.append(' '.join(vect[3]))
|
||||||
|
|
||||||
|
return tempy
|
||||||
|
|
||||||
|
|
||||||
|
def flagger():
|
||||||
|
|
||||||
|
emoDict = emoDic()
|
||||||
|
clust = clustering()
|
||||||
|
|
||||||
|
temp = []
|
||||||
|
for i in clust:
|
||||||
|
temp.append(trainPre(i, emoDict))
|
||||||
|
|
||||||
|
flag = []
|
||||||
|
for j in temp:
|
||||||
|
#print(j[2])
|
||||||
|
if j[2] == (['CONTRA', 'NEU', 'PRI'] or ['NEU', 'CONTRA', 'PRI'] or ['NEU', 'PRI', 'CONTRA'] or
|
||||||
|
['PRI', 'NEU', 'CONTRA'] or ['CONTRA', 'PRI', 'NEU'] or ['PRI', 'CONTRA', 'NEU']):
|
||||||
|
flag.append(1)
|
||||||
|
|
||||||
|
#else:
|
||||||
|
# flag.append(0)
|
||||||
|
|
||||||
|
if j[2] == (['CONTRA', 'PRI'] or ['PRI', 'CONTRA']):
|
||||||
|
flag.append(1)
|
||||||
|
|
||||||
|
#else:
|
||||||
|
# flag.append(6)
|
||||||
|
|
||||||
|
if j[2] == ['NEU']:
|
||||||
|
flag.append(1)
|
||||||
|
|
||||||
|
#else:
|
||||||
|
# flag.append(7)
|
||||||
|
|
||||||
|
if j[2] == (['PRI'] or ['NEU', 'PRI'] or ['PRI', 'NEU']):
|
||||||
|
flag.append(2)
|
||||||
|
|
||||||
|
#else:
|
||||||
|
# flag.append(8)
|
||||||
|
|
||||||
|
if j[2] == (['CONTRA'] or ['NEU', 'CONTRA'] or ['CONTRA', 'NEU']):
|
||||||
|
flag.append(3)
|
||||||
|
|
||||||
|
#else:
|
||||||
|
# flag.append(9)
|
||||||
|
|
||||||
|
|
||||||
|
index = []
|
||||||
|
for i in temp:
|
||||||
|
if i[0] == i[1]:
|
||||||
|
index.append(1)
|
||||||
|
|
||||||
|
if i[0] > i[1]:
|
||||||
|
index.append(2)
|
||||||
|
|
||||||
|
if i[0] < i[1]:
|
||||||
|
index.append(3)
|
||||||
|
|
||||||
|
|
||||||
|
lenFlag = len(flag)
|
||||||
|
lenInde = len(index)
|
||||||
|
|
||||||
|
if lenFlag < lenInde:
|
||||||
|
for i in range(lenInde - lenFlag):
|
||||||
|
flag.append(1)
|
||||||
|
|
||||||
|
|
||||||
|
return (index, flag)
|
||||||
|
|
||||||
|
|
||||||
|
def operate_on_Narray(A, B, function):
|
||||||
|
try:
|
||||||
|
return [operate_on_Narray(a, b, function) for a, b in zip(A, B)]
|
||||||
|
except TypeError as e:
|
||||||
|
# Not iterable
|
||||||
|
return function(A, B)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def trainVect():
|
||||||
|
|
||||||
|
flag = flagger()
|
||||||
|
corpus = corporizer()
|
||||||
|
|
||||||
|
stop_words = stopWrdList()
|
||||||
|
|
||||||
|
vectorizer = TfidfVectorizer(strip_accents='ascii', analyzer='word', stop_words=stop_words, max_features=100)
|
||||||
|
|
||||||
|
X = vectorizer.fit_transform(corpus)
|
||||||
|
vector = X.toarray()
|
||||||
|
|
||||||
|
long = len(flag[0])
|
||||||
|
|
||||||
|
part_neu_ind = []
|
||||||
|
part_neg_ind = []
|
||||||
|
part_pos_ind = []
|
||||||
|
cont_neu_ind = []
|
||||||
|
cont_neg_ind = []
|
||||||
|
cont_pos_ind = []
|
||||||
|
neut_neu_ind = []
|
||||||
|
neut_neg_ind = []
|
||||||
|
neut_pos_ind = []
|
||||||
|
|
||||||
|
# flag 0 has emotion info, flag 1 has political party info
|
||||||
|
# 1 is neutral emo ; 2 is positive emo ; 3 is negative emo
|
||||||
|
# 1 is neutral ; 2 is pol; 3 is opposition
|
||||||
|
|
||||||
|
for s in range(long):
|
||||||
|
if flag[0][s] == 1 and flag[1][s] == 1:
|
||||||
|
neut_neu_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 1 and flag[1][s] == 2:
|
||||||
|
part_neu_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 1 and flag[1][s] == 3:
|
||||||
|
cont_neu_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 2 and flag[1][s] == 2:
|
||||||
|
part_pos_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 2 and flag[1][s] == 3:
|
||||||
|
cont_pos_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 2 and flag[1][s] == 1:
|
||||||
|
neut_pos_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 3 and flag[1][s] == 1:
|
||||||
|
neut_neg_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 3 and flag[1][s] == 2:
|
||||||
|
part_neg_ind.append(s)
|
||||||
|
|
||||||
|
if flag[0][s] == 3 and flag[1][s] == 3:
|
||||||
|
cont_neg_ind.append(s)
|
||||||
|
|
||||||
|
part_neu_vect = [vector[x] for x in part_neu_ind]
|
||||||
|
part_neg_vect = [vector[x] for x in part_neg_ind]
|
||||||
|
part_pos_vect = [vector[x] for x in part_pos_ind]
|
||||||
|
cont_neu_vect = [vector[x] for x in cont_neu_ind]
|
||||||
|
cont_neg_vect = [vector[x] for x in cont_neg_ind]
|
||||||
|
cont_pos_vect = [vector[x] for x in cont_pos_ind]
|
||||||
|
neut_neu_vect = [vector[x] for x in neut_neu_ind]
|
||||||
|
neut_neg_vect = [vector[x] for x in neut_neg_ind]
|
||||||
|
neut_pos_vect = [vector[x] for x in neut_pos_ind]
|
||||||
|
|
||||||
|
############################################ 1
|
||||||
|
|
||||||
|
len1 = len(part_neu_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = part_neu_vect[0]
|
||||||
|
tmp = operate_on_Narray(part_neu_vect[0], tmp[a+1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(part_neu_vect[0], tmp[a+1], lambda x, y: x / len1)
|
||||||
|
|
||||||
|
part_neu_vect = list(tmp)
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
part_neu_vect = []
|
||||||
|
|
||||||
|
############################################ 2
|
||||||
|
|
||||||
|
len1 = len(part_neg_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = part_neg_vect[0]
|
||||||
|
tmp = operate_on_Narray(part_neg_vect[0], tmp[a+1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(part_neg_vect[0], tmp[a+1], lambda x, y: x / len1)
|
||||||
|
|
||||||
|
part_neg_vect = list(tmp)
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
part_neg_vect = []
|
||||||
|
|
||||||
|
############################################ 3
|
||||||
|
|
||||||
|
len1 = len(part_pos_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = part_pos_vect[0]
|
||||||
|
tmp = operate_on_Narray(part_pos_vect[0], tmp[a + 1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(part_pos_vect[0], tmp[a + 1], lambda x, y: x / len1)
|
||||||
|
part_pos_vect = list(tmp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
part_pos_vect = []
|
||||||
|
|
||||||
|
############################################ 4
|
||||||
|
|
||||||
|
len1 = len(cont_neu_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = cont_neu_vect[0]
|
||||||
|
tmp = operate_on_Narray(cont_neu_vect[0], tmp[a + 1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(cont_neu_vect[0], tmp[a + 1], lambda x, y: x / len1)
|
||||||
|
cont_neu_vect = list(tmp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
cont_neu_vect = []
|
||||||
|
|
||||||
|
############################################ 5
|
||||||
|
|
||||||
|
len1 = len(cont_neg_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = cont_neg_vect[0]
|
||||||
|
tmp = operate_on_Narray(cont_neg_vect[0], tmp[a + 1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(cont_neg_vect[0], tmp[a + 1], lambda x, y: x / len1)
|
||||||
|
cont_neg_vect = list(tmp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
cont_neg_vect = []
|
||||||
|
|
||||||
|
############################################ 6
|
||||||
|
|
||||||
|
len1 = len(cont_pos_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = cont_pos_vect[0]
|
||||||
|
tmp = operate_on_Narray(cont_pos_vect[0], tmp[a + 1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(cont_pos_vect[0], tmp[a + 1], lambda x, y: x / len1)
|
||||||
|
cont_pos_vect = list(tmp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
cont_pos_vect = []
|
||||||
|
|
||||||
|
############################################ 7
|
||||||
|
|
||||||
|
len1 = len(neut_neu_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = neut_neu_vect[0]
|
||||||
|
tmp = operate_on_Narray(neut_neu_vect[0], tmp[a + 1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(neut_neu_vect[0], tmp[a + 1], lambda x, y: x / len1)
|
||||||
|
neut_neu_vect = list(tmp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
neut_neu_vect = []
|
||||||
|
|
||||||
|
############################################ 8
|
||||||
|
|
||||||
|
len1 = len(neut_neg_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = neut_neg_vect[0]
|
||||||
|
tmp = operate_on_Narray(neut_neg_vect[0], tmp[a + 1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(neut_neg_vect[0], tmp[a + 1], lambda x, y: x / len1)
|
||||||
|
|
||||||
|
neut_neg_vect = list(tmp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
neut_neg_vect = []
|
||||||
|
|
||||||
|
############################################ 9
|
||||||
|
|
||||||
|
len1 = len(neut_pos_vect)
|
||||||
|
if len1 != 0:
|
||||||
|
for a in range(len1):
|
||||||
|
tmp = neut_pos_vect[0]
|
||||||
|
tmp = operate_on_Narray(neut_pos_vect[0], tmp[a + 1], lambda x, y: x + y)
|
||||||
|
|
||||||
|
tmp = operate_on_Narray(neut_pos_vect[0], tmp[a + 1], lambda x, y: x / len1)
|
||||||
|
|
||||||
|
neut_pos_vect = list(tmp)
|
||||||
|
|
||||||
|
else:
|
||||||
|
neut_pos_vect = []
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return [part_neu_vect, part_neg_vect, part_pos_vect, cont_neu_vect, cont_neg_vect, cont_pos_vect, neut_neu_vect, neut_neg_vect, neut_pos_vect]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def saveTraining():
|
||||||
|
|
||||||
|
sert = trainVect()
|
||||||
|
trnVect = open('trn_vect.vec', 'w')
|
||||||
|
|
||||||
|
for i in sert:
|
||||||
|
trnVect.write(str(i) + '\n')
|
|
@ -0,0 +1,45 @@
|
||||||
|
|
||||||
|
def emoDic():
|
||||||
|
|
||||||
|
emoDict = open('SEL.txt', 'r', encoding='utf-8')
|
||||||
|
temp = emoDict.read()
|
||||||
|
|
||||||
|
emoDict = temp.split('\n')
|
||||||
|
|
||||||
|
temp = []
|
||||||
|
|
||||||
|
for i in emoDict:
|
||||||
|
temp.append(i.split('\t'))
|
||||||
|
|
||||||
|
n = len(temp) -1
|
||||||
|
|
||||||
|
del temp[n]
|
||||||
|
|
||||||
|
for i in temp:
|
||||||
|
del i[1]
|
||||||
|
|
||||||
|
emoDict = {i[0]: i[1] for i in temp}
|
||||||
|
|
||||||
|
emoDict['PRI'] = 'Positivo'
|
||||||
|
emoDict['INE'] = 'Neutro'
|
||||||
|
emoDict['electoral'] = 'Neutro'
|
||||||
|
emoDict['Electoral'] = 'Neutro'
|
||||||
|
emoDict['PAN'] = 'Negativo'
|
||||||
|
emoDict['partido'] = 'Neutro'
|
||||||
|
emoDict['Partido'] = 'Neutro'
|
||||||
|
emoDict['PRD'] = 'Negativo'
|
||||||
|
emoDict['PT'] = 'Negativo'
|
||||||
|
emoDict['PANAL'] = 'Negativo'
|
||||||
|
emoDict['PVEM'] = 'Negativo'
|
||||||
|
emoDict['Movimiento'] = 'Negativo'
|
||||||
|
emoDict['Ciudadano'] = 'Negativo'
|
||||||
|
emoDict['Alianza'] = 'Negativo'
|
||||||
|
emoDict['Morena'] = 'Negtivo'
|
||||||
|
emoDict['electoral'] = 'Neutro'
|
||||||
|
emoDict['Electoral'] = 'Neutro'
|
||||||
|
emoDict['Encuentro'] = 'Negativo'
|
||||||
|
emoDict['Social'] = 'Negativo'
|
||||||
|
emoDict['Peña'] = 'Positivo'
|
||||||
|
emoDict['Nieto'] = 'Sorpresa' #['Sorpresa', 'Positivo']
|
||||||
|
|
||||||
|
return emoDict
|
|
@ -0,0 +1,45 @@
|
||||||
|
"""
|
||||||
|
Created on Mon Apr 17 09:34:40 2017
|
||||||
|
functions to calculate the similarity measure of two real vectors
|
||||||
|
@author: nlp
|
||||||
|
"""
|
||||||
|
# The cosine measure definition
|
||||||
|
def cos_sim(vect1, vect2):
|
||||||
|
if (len(vect1) == len(vect2)):
|
||||||
|
vect3 = []
|
||||||
|
for x in range(0, len(vect1)):
|
||||||
|
vect3.append(0)
|
||||||
|
|
||||||
|
for x in range(0, len(vect1)):
|
||||||
|
vect3[x] = vect1[x] * vect2[x]
|
||||||
|
|
||||||
|
n1 = norm(vect1)
|
||||||
|
n2 = norm(vect2)
|
||||||
|
|
||||||
|
return sum(vect3)/(n1*n2)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Norm of vector
|
||||||
|
def norm(vect):
|
||||||
|
import math as mth
|
||||||
|
vect1 = []
|
||||||
|
for x in range(0, len(vect)):
|
||||||
|
vect1.append(0)
|
||||||
|
|
||||||
|
for x in range(0, len(vect)):
|
||||||
|
vect1[x] = vect[x] * vect[x]
|
||||||
|
|
||||||
|
return mth.sqrt(sum(vect1))
|
||||||
|
|
||||||
|
# Jacard similarity
|
||||||
|
def jac_sim(set_A,set_B):
|
||||||
|
if (str(type(set_A)) and str(type(set_B))) == "<class 'set'>":
|
||||||
|
if set_A == set_B:
|
||||||
|
return len(set_A & set_B)/len(set_A | set_B)
|
||||||
|
else:
|
||||||
|
return len(set_A & set_B)/len((set_A | set_B) - (set_A & set_B))
|
||||||
|
else:
|
||||||
|
print('One of the inputs not of type set')
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
|
||||||
|
|
||||||
|
def stopWrdList():
|
||||||
|
sw = open('stop.words')
|
||||||
|
prue = []
|
||||||
|
prue.append(sw.readlines())
|
||||||
|
return [l.strip('\n\r') for l in prue[0]]
|
|
@ -0,0 +1,9 @@
|
||||||
|
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.049274784954524135, 0.0, 0.052802642179678255, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14190845147296166, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.093751910664710822, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.12260657382584206, 0.0, 0.1792195920876137, 0.0, 0.0, 0.05863903838975032, 0.0, 0.0, 0.078987206235014609, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14190845147296166, 0.0]
|
||||||
|
[0.18807877008848778, 0.0, 0.0, 0.24506066350553243, 0.24506066350553243, 0.0, 0.0, 0.0, 0.0, 0.24506066350553243, 0.0, 0.0, 0.0, 0.085092264553030261, 0.0, 0.091184495307262539, 0.0, 0.0, 0.0, 0.0, 0.17839612176741074, 0.22671659111470852, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.26871028605351627, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.26871028605351627, 0.0, 0.0, 0.0, 0.0, 0.26871028605351627, 0.0, 0.22671659111470852, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.26871028605351627, 0.0, 0.24506066350553243, 0.0, 0.0, 0.0, 0.24506066350553243, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16189948656287695, 0.0, 0.0, 0.24506066350553243, 0.24506066350553243, 0.0, 0.0, 0.0, 0.0, 0.21172839263647161, 0.0, 0.0, 0.0, 0.0, 0.1547464992194269, 0.0, 0.0, 0.10126332509418294, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
||||||
|
[0.20416278128131485, 0.0, 0.18424997114557037, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.26601761921557976, 0.0, 0.0, 0.0, 0.092369135487745696, 0.26601761921557976, 0.098982358098710421, 0.0, 0.0, 0.0, 0.0, 0.19365209785612086, 0.24610480907983531, 0.0, 0.0, 0.0, 0.0, 0.29168969647011522, 0.0, 0.29168969647011522, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.29168969647011522, 0.0, 0.0, 0.29168969647011522, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17574471296862779, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22983485853585034, 0.0, 0.0, 0.0, 0.0, 0.1679800206015854, 0.0, 0.26601761921557976, 0.10992310340659664, 0.0, 0.0, 0.0, 0.24610480907983531, 0.0, 0.0, 0.0, 0.26601761921557976, 0.0, 0.0, 0.0, 0.0]
|
||||||
|
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.070657270308847969, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.026519396344811225, 0.0, 0.028418068131773094, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.083744798839379686, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.058615615346713827, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07637428499714545, 0.0, 0.0, 0.0, 0.070657270308847969, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
||||||
|
[]
|
||||||
|
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.19872849853197644, 0.0, 0.0, 0.0, 0.0, 0.031465594822711003, 0.090619042581059198, 0.033718392600347598, 0.099364249265988219, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.083835733284026395, 0.083835733284026395, 0.0, 0.099364249265988219, 0.0, 0.0, 0.0, 0.099364249265988219, 0.099364249265988219, 0.0, 0.099364249265988219, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.099364249265988219, 0.083835733284026395, 0.099364249265988219, 0.099364249265988219, 0.0, 0.0, 0.0, 0.0, 0.099364249265988219, 0.099364249265988219, 0.0, 0.099364249265988219, 0.0, 0.0, 0.0, 0.099364249265988219, 0.099364249265988219, 0.099364249265988219, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.099364249265988219, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.090619042581059198, 0.0, 0.0, 0.0, 0.078293366032245681, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.037445363271866934, 0.0, 0.0, 0.0, 0.0, 0.0, 0.090619042581059198, 0.0, 0.0, 0.0, 0.0, 0.0, 0.083835733284026395]
|
||||||
|
[0.0, 0.0, 0.035693752479062507, 0.0, 0.0, 0.0, 0.047676556390135238, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.017894174084834576, 0.0, 0.019175318008478492, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.051534157624635722, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.021294809547761393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.051534157624635722, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
|
||||||
|
[]
|
||||||
|
[0.0, 0.0, 0.041165800996213739, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.020637449363162412, 0.0, 0.022114999694671268, 0.0, 0.0, 0.065170376542680042, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.059434627349076764, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.065170376542680042, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.059434627349076764, 0.0, 0.0, 0.0, 0.0, 0.0, 0.059434627349076764, 0.0, 0.0, 0.0, 0.045614793710984938, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.037530709266496383, 0.0, 0.059434627349076764, 0.024559420940951236, 0.059434627349076764, 0.0, 0.03308171655172519, 0.0, 0.054985634634305572, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.054985634634305572]
|
Loading…
Reference in New Issue