Merge pull request #4 from EddieCueto/fedora25_work

Fedora25 work
2017-10-30 17:33:54 -05:00 · 2017-10-30 17:33:54 -05:00 · 2c81e015ca
commit 2c81e015ca
parent a56b8578b3 a761f0b8dc
6 changed files with 272 additions and 17 deletions
--- a/clust.py
+++ b/clust.py
@ -0,0 +1,56 @@
+from infBack import get_vect as gv
+import numpy as np
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn import cluster
+from matplotlib import pyplot
+import numpy as np
+
+def stopWrdList():
+    sw = open('stop.words')
+    prue = []
+    prue.append(sw.readlines())
+    return [l.strip('\n\r') for l in prue[0]]
+
+
+stop_words = stopWrdList()
+
+dataVect = gv()
+
+dataVect = np.array(dataVect)
+
+corpus = dataVect[:, 2]
+
+vectorizer = CountVectorizer(stop_words=stop_words)
+transformer = TfidfTransformer(smooth_idf=False)
+
+X = vectorizer.fit_transform(corpus)
+
+del dataVect, corpus, stop_words
+
+J = X.toarray()
+
+tf_idf = transformer.fit_transform(J)
+
+tf_idf_matrix = tf_idf.toarray()
+
+k = 2
+kmeans = cluster.KMeans(n_clusters=k)
+kmeans.fit(J)
+
+labels = kmeans.labels_
+centroids = kmeans.cluster_centers_
+
+for i in range(k):
+    # select only data observations with cluster label == i
+    ds = J[np.where(labels == i)]
+    # plot the data observations
+    pyplot.plot(ds[:,0],ds[:,1],'o')
+    # plot the centroids
+    lines = pyplot.plot(centroids[i, 0], centroids[i, 1], 'kx')
+    # make the centroid x's bigger
+    pyplot.setp(lines, ms=15.0)
+    pyplot.setp(lines, mew=2.0)
+pyplot.show()
+
+print(X.toarray())
--- a/daemon.py
+++ b/daemon.py
@ -22,8 +22,9 @@ class Daemon(Thread):

    def run(self):
        while True:
-            if str(strftime("%H:%M:%S", gmtime())) == '05:00:00':
+            if str(strftime("%H:%M:%S", gmtime())) == ('12:00:00' or '24:00:00'):
                get_data_rss()
+                print('Data capture finished at time' + str(strftime("%H:%M:%S", gmtime())))


 def main_fct():
--- a/infBack.py
+++ b/infBack.py
@ -1,18 +1,36 @@
+def get_vect():
+
    import yaml
-import feedparser as fp

    rawDat = open('rss_univ.txt', 'r')

    strDat = rawDat.read()

    rawDat = strDat.split(';\n')
+
    index = len(rawDat) - 1
    rawDat.pop(index)

-strDat = yaml.load(rawDat[0])
+    strDat = []
+
+    for i in rawDat:
+        strDat.append(yaml.load(i))
+
+    del rawDat
+
+    impDat = []
+    for d in strDat:
+        impDat.append([d['entries'][0]['title'], d['entries'][0]['links'][0]['href'], d['entries'][0]['summary']])
+
+    del strDat
+
+    return impDat
+
+

 # this section of the code show how to extract relevant data from the dictionaries
-print(len(rawDat))
-print(strDat['entries'][0]['title'])
-print(strDat['entries'][0]['links'][0]['href'])
-print(strDat['entries'][0]['summary'])
+"""
+print(dic['entries'][0]['title'])
+print(dic['entries'][0]['links'][0]['href'])
+print(dic['entries'][0]['summary'])
+"""
--- a/infoRet.py
+++ b/infoRet.py
@ -6,12 +6,12 @@ def get_data_rss():

    datUniver = fp.parse('http://www.eluniversal.com.mx/seccion/1/rss.xml')
    datJorn = fp.parse('http://www.jornada.unam.mx/rss/politica.xml?v=1')
-    datCnn = fp.parse('http://expansion.mx/rss/politica')
+    datAri = fp.parse('http://aristeguinoticias.com/category/mexico/feed/')

    file = open('rss_univ.txt', 'a')

-    # file.write(str(datCnn.headers['Date']) + ';\n')
-    file.write(str(datCnn) + ';\n')
+    # file.write(str(datAri.headers['Date']) + ';\n')
+    file.write(str(datAri) + ';\n')
    # file.write(str(datUniver.headers['Date']) + ';\n')
    file.write(str(datUniver) + ';\n')
    # file.write(str(datJorn.headers['Date']) + ';\n')
@ -19,7 +19,6 @@ def get_data_rss():

    file.close()

-
 #  SOME COMMANDS OF FEEDPARSER

 #  print(datUniver['feed']['link'] + '\n')
--- a/stop.words
+++ b/stop.words
@ -0,0 +1,178 @@
+un
+una
+unas
+unos
+uno
+sobre
+todo
+también
+tras
+otro
+algún
+alguno
+alguna
+algunos
+algunas
+ser
+es
+soy
+eres
+somos
+sois
+estoy
+esta
+estamos
+estais
+estan
+como
+en
+para
+atras
+porque
+por qué
+estado
+estaba
+ante
+antes
+siendo
+ambos
+pero
+por
+poder
+puede
+puedo
+podemos
+podeis
+pueden
+fui
+fue
+fuimos
+fueron
+hacer
+hago
+hace
+hacemos
+haceis
+hacen
+cada
+fin
+incluso
+primero
+desde
+conseguir
+consigo
+consigue
+consigues
+conseguimos
+consiguen
+ir
+voy
+va
+vamos
+vais
+van
+vaya
+gueno
+ha
+tener
+tengo
+tiene
+tenemos
+teneis
+tienen
+el
+la
+lo
+las
+los
+su
+aqui
+mio
+tuyo
+ellos
+ellas
+nos
+nosotros
+vosotros
+vosotras
+si
+dentro
+solo
+solamente
+saber
+sabes
+sabe
+sabemos
+sabeis
+saben
+ultimo
+largo
+bastante
+haces
+muchos
+aquellos
+aquellas
+sus
+entonces
+tiempo
+verdad
+verdadero
+verdadera
+cierto
+ciertos
+cierta
+ciertas
+intentar
+intento
+intenta
+intentas
+intentamos
+intentais
+intentan
+dos
+bajo
+arriba
+encima
+usar
+uso
+usas
+usa
+usamos
+usais
+usan
+emplear
+empleo
+empleas
+emplean
+ampleamos
+empleais
+valor
+muy
+era
+eras
+eramos
+eran
+modo
+bien
+cual
+cuando
+donde
+mientras
+quien
+con
+entre
+sin
+trabajo
+trabajar
+trabajas
+trabaja
+trabajamos
+trabajais
+trabajan
+podria
+podrias
+podriamos
+podrian
+podriais
+yo
+aquel
--- a/time_test.py
+++ b/time_test.py
@ -0,0 +1,3 @@
+from time import gmtime, strftime
+
+print(strftime("%H:%M:%S", gmtime()))