This commit is contained in:
elem_work 2017-11-27 19:14:38 -06:00
parent 71715a021c
commit 5aef60aa91
7 changed files with 2211 additions and 25 deletions

2036
SEL.txt Normal file

File diff suppressed because it is too large Load Diff

48
clust.py Normal file → Executable file
View File

@ -1,45 +1,49 @@
from infBack import get_vect as gv from infBack import get_vect as gv
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer
from stopWords import stopWrdList
import numpy as np import numpy as np
def stopWrdList(): def clustering():
sw = open('stop.words')
prue = []
prue.append(sw.readlines())
return [l.strip('\n\r') for l in prue[0]]
# This are the relevant news cue words
voc = ["ine", "pri", "pan", "prd", "pt", "pvem", "verde", "movimiento", "ciudadano", "panal", "alianza", "morena", "partido", "encuentro", "social", "electoral"]
voc = ["ine", "pri", "pan", "prd", "pt", "pvem", "verde", "movimiento", "ciudadano", "panal", "alianza", "morena", "partido", "encuentro", "social", "electoral"] stop_words = stopWrdList()
stop_words = stopWrdList() dataVect = gv()
dataVect = gv() dataVect = np.array(dataVect)
dataVect = np.array(dataVect) corpus = dataVect[:, 2]
corpus = dataVect[:, 2] vectorizer = TfidfVectorizer(strip_accents='ascii', analyzer='word', stop_words=stop_words, vocabulary=voc)
vectorizer = TfidfVectorizer(strip_accents='ascii', analyzer='word', stop_words=stop_words, vocabulary=voc) X = vectorizer.fit_transform(corpus)
X = vectorizer.fit_transform(corpus) del dataVect, stop_words, vectorizer # , corpus
del dataVect, stop_words, vectorizer # , corpus J = X.toarray()
J = X.toarray() # The indexes are extracted to obtain only the relevant news from the general corpus
# print(J) index = []
index = [] for x in range(0, len(J)):
for x in range(0, len(J)):
if sum(J[x]) != 0: if sum(J[x]) != 0:
index.append(x) index.append(x)
index = tuple(index) index = tuple(index)
electCorp = [corpus[x] for x in index] electCorp = [corpus[x] for x in index]
del corpus del corpus
print(electCorp) # This section of the code processes the political party news in order to give a emotional classification
temp = []
for i in electCorp:
temp.append(i.split(' '))
return temp

View File

@ -27,8 +27,6 @@ def get_vect():
return impDat return impDat
# print(len(get_vect()))
# this section of the code show how to extract relevant data from the dictionaries # this section of the code show how to extract relevant data from the dictionaries
""" """

19
main.py Normal file
View File

@ -0,0 +1,19 @@
from retEmoDict import emoDic
from newsTrain import classifyNews
from clust import clustering
temp = clustering()
emoDict = emoDic()
rest = []
for i in temp:
rest.append(classifyNews(i, emoDict))
for i in rest:
print(i)

77
newsTrain.py Normal file
View File

@ -0,0 +1,77 @@
def classifyNews(word_array, dict):
default = 'NA'
alegria = []
enojo = []
miedo = []
repulsion = []
sorpresa = []
tristeza = []
proper = []
part = []
for word in word_array:
if dict.get(str(word), default) == 'Alegría':
alegria.append(1)
proper.append(word)
if dict.get(str(word), default) == 'Enojo':
enojo.append(1)
proper.append(word)
if dict.get(str(word), default) == 'Miedo':
miedo.append(1)
proper.append(word)
if dict.get(str(word), default) == 'Repulsión':
repulsion.append(1)
proper.append(word)
if dict.get(str(word), default) == 'Sorpresa':
sorpresa.append(1)
proper.append(word)
if dict.get(str(word), default) == 'Tristeza':
tristeza.append(1)
proper.append(word)
if dict.get(str(word), default) == 'Positivo':
part.append('PRI')
proper.append(word)
if dict.get(str(word), default) == 'Negativo':
part.append('CONTRA')
proper.append(word)
if dict.get(str(word), default) == 'Neutro':
part.append('NEU')
proper.append(word)
if dict.get(str(word), default) == 'NA':
proper.append(word)
part = set(part)
flag = list(part)
vect = set(proper)
vect = list(vect)
tot = len(word_array)
alegria = sum(alegria)
enojo = sum(enojo)
miedo = sum(miedo)
repulsion = sum(repulsion)
sorpresa = sum(sorpresa)
tristeza = sum(tristeza)
pos = (alegria + sorpresa) / tot
neg = (enojo + miedo + repulsion + tristeza) / tot
if len(flag) == 0:
flag = ['NEU']
return [('Positive:', pos), ('Negative:', neg), flag, vect]

45
retEmoDict.py Normal file
View File

@ -0,0 +1,45 @@
def emoDic():
emoDict = open('SEL.txt', 'r', encoding='utf-8')
temp = emoDict.read()
emoDict = temp.split('\n')
temp = []
for i in emoDict:
temp.append(i.split('\t'))
n = len(temp) -1
del temp[n]
for i in temp:
del i[1]
emoDict = {i[0]: i[1] for i in temp}
emoDict['PRI'] = 'Positivo'
emoDict['INE'] = 'Neutro'
emoDict['electoral'] = 'Neutro'
emoDict['Electoral'] = 'Neutro'
emoDict['PAN'] = 'Negativo'
emoDict['partido'] = 'Neutro'
emoDict['Partido'] = 'Neutro'
emoDict['PRD'] = 'Negativo'
emoDict['PT'] = 'Negativo'
emoDict['PANAL'] = 'Negativo'
emoDict['PVEM'] = 'Negativo'
emoDict['Movimiento'] = 'Negativo'
emoDict['Ciudadano'] = 'Negativo'
emoDict['Alianza'] = 'Negativo'
emoDict['Morena'] = 'Negtivo'
emoDict['electoral'] = 'Neutro'
emoDict['Electoral'] = 'Neutro'
emoDict['Encuentro'] = 'Negativo'
emoDict['Social'] = 'Negativo'
emoDict['Peña'] = 'Positivo'
emoDict['Nieto'] = 'Sorpresa' #['Sorpresa', 'Positivo']
return emoDict

7
stopWords.py Normal file
View File

@ -0,0 +1,7 @@
def stopWrdList():
sw = open('stop.words')
prue = []
prue.append(sw.readlines())
return [l.strip('\n\r') for l in prue[0]]