Commit 464fcdb1 authored by Alberto Pascual's avatar Alberto Pascual
Browse files

facebook scraper fixes

parent fa4ad0a3
Pipeline #431 passed with stages
in 32 seconds
......@@ -15,7 +15,7 @@ def main(args):
for argument in args[1:]:
identifier = time.time()
command = 'python -m luigi --module analysistask PipelineTask --index somedi --doc-type news --url {url} --id "{id}" --analysisType "sentiments,emotions" --num {num}'.format(url=str(argument),id=identifier, num=int(args[0]))
subprocess.Popen(command.split(), shell= False)
subprocess.call(command.split(), shell= False)
def cron(arg):
......
......@@ -574,7 +574,7 @@ Data can be provided in one of three ways:
return [key, pp, psoe, podemos, ciudadanos];
});
console.log(data)
}
else {
......
......@@ -2,7 +2,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: somedi-config
name: ${NAME}-crawler
data:
ES_ENDPOINT: "$ES_ENDPOINT"
ES_PORT: "$ES_PORT"
......@@ -17,14 +17,14 @@ data:
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: ${NAME}
name: ${NAME}-crawler
spec:
replicas: 1
template:
metadata:
labels:
role: somedi-luigi
app: ${NAME}
role: ${NAME}-luigi
app: ${NAME}-luigi
spec:
imagePullSecrets:
- name: registry.cluster.gsi.dit.upm.es
......@@ -41,7 +41,7 @@ spec:
containerPort: 8082
envFrom:
- configMapRef:
name: somedi-config
name: ${NAME}-crawler
---
apiVersion: v1
kind: ConfigMap
......@@ -54,14 +54,14 @@ data:
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: ${NAME}-deploy
name: ${NAME}-web
spec:
replicas: 1
template:
metadata:
labels:
role: ${NAME}-web
app: ${NAME}
app: ${NAME}-web
spec:
imagePullSecrets:
- name: registry.cluster.gsi.dit.upm.es
......@@ -78,5 +78,5 @@ spec:
containerPort: 8090
envFrom:
- configMapRef:
name: ${NAME}-config
name: ${NAME}-web
......@@ -2,7 +2,7 @@
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: ${NAME}
name: ${NAME}-crawler
annotations:
ingress.kubernetes.io/rewrite-target: /
spec:
......@@ -12,13 +12,13 @@ spec:
paths:
- path: /
backend:
serviceName: ${NAME}
serviceName: ${NAME}-crawler
servicePort: 8082
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: ${NAME}
name: ${NAME}-web
annotations:
ingress.kubernetes.io/rewrite-target: /
spec:
......@@ -28,6 +28,6 @@ spec:
paths:
- path: /
backend:
serviceName: ${NAME}
servicePort: 8080
serviceName: ${NAME}-web
servicePort: 8090
......@@ -2,7 +2,7 @@
apiVersion: v1
kind: Service
metadata:
name: ${NAME}
name: ${NAME}-crawler
spec:
type: ClusterIP
ports:
......@@ -10,16 +10,16 @@ spec:
port: 8082
protocol: TCP
selector:
role: somedi-luigi
role: ${NAME}-crawler
---
apiVersion: v1
kind: Service
metadata:
name: $NAME
name: ${NAME}-web
spec:
type: ClusterIP
ports:
- port: 8080
- port: 8090
protocol: TCP
selector:
role: ${NAME}-web
......
No preview for this file type
......@@ -46,6 +46,8 @@ def request_until_succeed(url):
# - shares&limit= : extrae el numero de veces que se ha compartido la noticia
def getFBPageFeedData (page_id,num_status,filepath):
page_idbak = page_id
if page_id == "podemos": page_id = "ahorapodemos"
if page_id == "ciudadanos": page_id = "Cs.Ciudadanos"
......@@ -71,8 +73,8 @@ def getFBPageFeedData (page_id,num_status,filepath):
aux["schema:datePublished"] = post["created_time"]
aux["schema:articleBody"] = post["message"]
aux["schema:author"] = 'facebook'
aux["schema:creator"] = page_id
aux["schema:search"] = page_id
aux["schema:creator"] = page_idbak
aux["schema:search"] = page_idbak
aux['comments'] = post['comments']
json.dump(aux, outfile)
outfile.write('\n')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment