Merge pull request #4 from EddieCueto/fedora25_work

Fedora25 work
2017-10-30 17:33:54 -05:00 · 2017-10-30 17:33:54 -05:00 · 2c81e015ca
commit 2c81e015ca
parent a56b8578b3 a761f0b8dc
6 changed files with 272 additions and 17 deletions
--- a/clust.py
+++ b/clust.py
@ -0,0 +1,56 @@
 from infBack import get_vect as gv
 import numpy as np
 from sklearn.feature_extraction.text import TfidfTransformer
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn import cluster
 from matplotlib import pyplot
 import numpy as np
 def stopWrdList():
    sw = open('stop.words')
    prue = []
    prue.append(sw.readlines())
    return [l.strip('\n\r') for l in prue[0]]
 stop_words = stopWrdList()
 dataVect = gv()
 dataVect = np.array(dataVect)
 corpus = dataVect[:, 2]
 vectorizer = CountVectorizer(stop_words=stop_words)
 transformer = TfidfTransformer(smooth_idf=False)
 X = vectorizer.fit_transform(corpus)
 del dataVect, corpus, stop_words
 J = X.toarray()
 tf_idf = transformer.fit_transform(J)
 tf_idf_matrix = tf_idf.toarray()
 k = 2
 kmeans = cluster.KMeans(n_clusters=k)
 kmeans.fit(J)
 labels = kmeans.labels_
 centroids = kmeans.cluster_centers_
 for i in range(k):
    # select only data observations with cluster label == i
    ds = J[np.where(labels == i)]
    # plot the data observations
    pyplot.plot(ds[:,0],ds[:,1],'o')
    # plot the centroids
    lines = pyplot.plot(centroids[i, 0], centroids[i, 1], 'kx')
    # make the centroid x's bigger
    pyplot.setp(lines, ms=15.0)
    pyplot.setp(lines, mew=2.0)
 pyplot.show()
 print(X.toarray())
--- a/daemon.py
+++ b/daemon.py
@ -22,8 +22,9 @@ class Daemon(Thread):
    def run(self):
        while True:
-            if str(strftime("%H:%M:%S", gmtime())) == '05:00:00':
+            if str(strftime("%H:%M:%S", gmtime())) == ('12:00:00' or '24:00:00'):
                get_data_rss()
                print('Data capture finished at time' + str(strftime("%H:%M:%S", gmtime())))
 def main_fct():
--- a/infBack.py
+++ b/infBack.py
@ -1,18 +1,36 @@
 def get_vect():
    import yaml
 import feedparser as fp
    rawDat = open('rss_univ.txt', 'r')
    strDat = rawDat.read()
    rawDat = strDat.split(';\n')
    index = len(rawDat) - 1
    rawDat.pop(index)
-strDat = yaml.load(rawDat[0])
+    strDat = []
    for i in rawDat:
        strDat.append(yaml.load(i))
    del rawDat
    impDat = []
    for d in strDat:
        impDat.append([d['entries'][0]['title'], d['entries'][0]['links'][0]['href'], d['entries'][0]['summary']])
    del strDat
    return impDat
 # this section of the code show how to extract relevant data from the dictionaries
-print(len(rawDat))
+"""
-print(strDat['entries'][0]['title'])
+print(dic['entries'][0]['title'])
-print(strDat['entries'][0]['links'][0]['href'])
+print(dic['entries'][0]['links'][0]['href'])
-print(strDat['entries'][0]['summary'])
+print(dic['entries'][0]['summary'])
 """
--- a/infoRet.py
+++ b/infoRet.py
@ -6,12 +6,12 @@ def get_data_rss():
    datUniver = fp.parse('http://www.eluniversal.com.mx/seccion/1/rss.xml')
    datJorn = fp.parse('http://www.jornada.unam.mx/rss/politica.xml?v=1')
-    datCnn = fp.parse('http://expansion.mx/rss/politica')
+    datAri = fp.parse('http://aristeguinoticias.com/category/mexico/feed/')
    file = open('rss_univ.txt', 'a')
-    # file.write(str(datCnn.headers['Date']) + ';\n')
+    # file.write(str(datAri.headers['Date']) + ';\n')
-    file.write(str(datCnn) + ';\n')
+    file.write(str(datAri) + ';\n')
    # file.write(str(datUniver.headers['Date']) + ';\n')
    file.write(str(datUniver) + ';\n')
    # file.write(str(datJorn.headers['Date']) + ';\n')
@ -19,7 +19,6 @@ def get_data_rss():
    file.close()
 #  SOME COMMANDS OF FEEDPARSER
 #  print(datUniver['feed']['link'] + '\n')
--- a/stop.words
+++ b/stop.words
@ -0,0 +1,178 @@
 un
 una
 unas
 unos
 uno
 sobre
 todo
 también
 tras
 otro
 algún
 alguno
 alguna
 algunos
 algunas
 ser
 es
 soy
 eres
 somos
 sois
 estoy
 esta
 estamos
 estais
 estan
 como
 en
 para
 atras
 porque
 por qué
 estado
 estaba
 ante
 antes
 siendo
 ambos
 pero
 por
 poder
 puede
 puedo
 podemos
 podeis
 pueden
 fui
 fue
 fuimos
 fueron
 hacer
 hago
 hace
 hacemos
 haceis
 hacen
 cada
 fin
 incluso
 primero
 desde
 conseguir
 consigo
 consigue
 consigues
 conseguimos
 consiguen
 ir
 voy
 va
 vamos
 vais
 van
 vaya
 gueno
 ha
 tener
 tengo
 tiene
 tenemos
 teneis
 tienen
 el
 la
 lo
 las
 los
 su
 aqui
 mio
 tuyo
 ellos
 ellas
 nos
 nosotros
 vosotros
 vosotras
 si
 dentro
 solo
 solamente
 saber
 sabes
 sabe
 sabemos
 sabeis
 saben
 ultimo
 largo
 bastante
 haces
 muchos
 aquellos
 aquellas
 sus
 entonces
 tiempo
 verdad
 verdadero
 verdadera
 cierto
 ciertos
 cierta
 ciertas
 intentar
 intento
 intenta
 intentas
 intentamos
 intentais
 intentan
 dos
 bajo
 arriba
 encima
 usar
 uso
 usas
 usa
 usamos
 usais
 usan
 emplear
 empleo
 empleas
 emplean
 ampleamos
 empleais
 valor
 muy
 era
 eras
 eramos
 eran
 modo
 bien
 cual
 cuando
 donde
 mientras
 quien
 con
 entre
 sin
 trabajo
 trabajar
 trabajas
 trabaja
 trabajamos
 trabajais
 trabajan
 podria
 podrias
 podriamos
 podrian
 podriais
 yo
 aquel
--- a/time_test.py
+++ b/time_test.py
@ -0,0 +1,3 @@
 from time import gmtime, strftime
 print(strftime("%H:%M:%S", gmtime()))
		`@ -0,0 +1,3 @@`
							`from time import gmtime, strftime`

							`print(strftime("%H:%M:%S", gmtime()))`