Commit c8990c9b183fc898c0e25a944f6493aca2329aa8

Authored by Diego Ceccarelli
1 parent c72eba05

add resiltech collector

blacklist.txt
... ... @@ -232,4 +232,11 @@ suo
232 232 dal
233 233 piu
234 234 sta
235   -non
236 235 \ No newline at end of file
  236 +non
  237 +alle
  238 +giÃa
  239 +tutta
  240 +porca
  241 +sesso
  242 +troia
  243 +cazzo
... ...
logback.xml
... ... @@ -22,8 +22,9 @@
22 22  
23 23  
24 24 <logger name="it.cnr.isti.hpc.property.ProjectProperties" level="ERROR" />
25   -
26   -
  25 +
  26 + <logger name="twitter4j.TwitterStreamImpl" level="ERROR" />
  27 +
27 28  
28 29  
29 30  
... ...
... ... @@ -36,6 +36,36 @@
36 36 </dependency>
37 37  
38 38  
  39 + <!-- rest dependencies -->
  40 +
  41 + <dependency>
  42 + <groupId>com.sun.jersey</groupId>
  43 + <artifactId>jersey-servlet</artifactId>
  44 + <version>1.17</version>
  45 + </dependency>
  46 + <dependency>
  47 + <groupId>com.sun.jersey.contribs</groupId>
  48 + <artifactId>jersey-multipart</artifactId>
  49 + <version>1.17</version>
  50 + </dependency>
  51 + <dependency>
  52 + <groupId>com.sun.jersey</groupId>
  53 + <artifactId>jersey-json</artifactId>
  54 + <version>1.17</version>
  55 + </dependency>
  56 + <dependency>
  57 + <groupId>com.sun.jersey</groupId>
  58 + <artifactId>jersey-bundle</artifactId>
  59 + <version>1.17</version>
  60 + </dependency>
  61 +
  62 + <dependency>
  63 + <groupId>org.jvnet</groupId>
  64 + <artifactId>mimepull</artifactId>
  65 + <version>1.6</version>
  66 + </dependency>
  67 +
  68 +
39 69 </dependencies>
40 70 <build>
41 71 <plugins>
... ...
project.properties
... ... @@ -8,4 +8,18 @@ category.threshold=0.2f
8 8 keyword.to.category=src/main/resources/keywords/keyword-to-category-2.tsv
9 9 hpc.rest.service=http://localhost:8080/secure/rest
10 10 trends.twitter.core=./resources/trends/trends-new-5.json
11   -resiltec.trend.service=yonose
  11 +resiltec.trend.service=http://office.resiltech.com:8084/eventService/webresources/TEP
  12 +
  13 +
  14 +oauth.consumerKey=EKfIG6HHvN5CdnZHNmqDVQ
  15 +oauth.consumerSecret=xN8yjOvh0s77Ywbcw8eTPVe0H2unWIOJhGQa314A
  16 +oauth.accessToken=227458257-v3rknWtpf1bMzgzZnEhgpcdvfTtGFVyzT0SHAQ84
  17 +oauth.accessTokenSecret=4OQZmCPUAMYZ78sONqRN4mpE2fpAuU0S7ChdLC4U
  18 +
  19 +min.freq=50
  20 +
  21 +#bucket interval in minutes
  22 +bucket.interval=30
  23 +
  24 +
  25 +
... ...
scripts/config.sh 100644 → 100755
... ... @@ -6,7 +6,6 @@ LOG=INFO
6 6 ##LOG=DEBUG
7 7 LOGAT=1000
8 8 E_BADARGS=65
9   -JAVA="java $XMX -Dlogat=$LOGAT -Dlog=$LOG -cp .:./target/FIXME"
10   -CLI=it.cnr.isti.hpc.FIXME
11   -
  9 +JAVA="java $XMX -Dlogat=$LOGAT -Dlogback.configurationFile=./logback.xml -cp .:./target/twitter-trends-0.0.1-SNAPSHOT-jar-with-dependencies.jar"
  10 +CLI=it.cnr.isti.hpc.trends.cli
12 11 export LC_ALL=C
... ...
scripts/example.sh renamed to scripts/trends.sh 100644 → 100755
1 1 #!/usr/bin/env bash
2 2 source ./scripts/config.sh
3 3  
4   -EXPECTED_ARGS=2
  4 +EXPECTED_ARGS=0
5 5  
6 6 if [ $# -ne $EXPECTED_ARGS ]
7 7 then
8   - echo "Usage: `basename $0` input1 input2"
  8 + echo "Usage: `basename $0`"
9 9 exit $E_BADARGS
10 10 fi
11 11  
12   -echo "hello world param $1, param $2"
  12 +echo "computing trends"
  13 +$JAVA $CLI.ResiltechTrendsCLI
... ...
src/main/java/it/cnr/isti/hpc/trends/Keywords.java
... ... @@ -15,6 +15,10 @@
15 15 */
16 16 package it.cnr.isti.hpc.trends;
17 17  
  18 +import it.cnr.isti.hpc.io.IOUtils;
  19 +
  20 +import java.util.List;
  21 +
18 22 /**
19 23 * @author Diego Ceccarelli <diego.ceccarelli@isti.cnr.it>
20 24 *
... ... @@ -23,8 +27,9 @@ package it.cnr.isti.hpc.trends;
23 27 public class Keywords {
24 28  
25 29 public String[] getKeywords() {
  30 + List<String> keywords = IOUtils
  31 + .getLines("./twitter-filter-keywords.txt");
  32 + return keywords.toArray(new String[keywords.size()]);
26 33  
27   - return "pisa,pontedera,attacco,ultras,casapound,terrorismo,elezioni,presidente,renzi"
28   - .split(",");
29 34 }
30 35 }
... ...
src/main/java/it/cnr/isti/hpc/trends/cli/ResiltechTrendsCLI.java 0 → 100644
  1 +/**
  2 + * Copyright 2015 Diego Ceccarelli
  3 + *
  4 + * Licensed under the Apache License, Version 2.0 (the "License");
  5 + * you may not use this file except in compliance with the License.
  6 + * You may obtain a copy of the License at
  7 + *
  8 + * http://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS,
  12 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 + * See the License for the specific language governing permissions and
  14 + * limitations under the License.
  15 + */
  16 +package it.cnr.isti.hpc.trends.cli;
  17 +
  18 +import it.cnr.isti.hpc.cli.AbstractCommandLineInterface;
  19 +import it.cnr.isti.hpc.property.ProjectProperties;
  20 +import it.cnr.isti.hpc.trends.TrendDetector;
  21 +import it.cnr.isti.hpc.trends.listener.TwitterAPIListener;
  22 +import it.cnr.isti.hpc.trends.output.FileTrendCollector;
  23 +import it.cnr.isti.hpc.trends.output.ResiltechCollector;
  24 +
  25 +import javax.activity.InvalidActivityException;
  26 +
  27 +import org.slf4j.Logger;
  28 +import org.slf4j.LoggerFactory;
  29 +
  30 +/**
  31 + * @author Diego Ceccarelli <diego.ceccarelli@isti.cnr.it>
  32 + *
  33 + * Created on Mar 4, 2015
  34 + */
  35 +public class ResiltechTrendsCLI extends AbstractCommandLineInterface {
  36 +
  37 + static ProjectProperties properties = new ProjectProperties(
  38 + ResiltechTrendsCLI.class);
  39 +
  40 + private static final Logger logger = LoggerFactory
  41 + .getLogger(ResiltechTrendsCLI.class);
  42 +
  43 + public ResiltechTrendsCLI(String[] args) {
  44 + super(args, new String[] {});
  45 + }
  46 +
  47 + public static void main(String[] args) throws InvalidActivityException {
  48 + int minFreq = properties.getInt("min.freq");
  49 + int bucketInterval = properties.getInt("bucket.interval");
  50 + logger.info("term minimum frequency = {}", minFreq);
  51 + logger.info("trend detected every {} minutes", bucketInterval);
  52 +
  53 + TrendDetector td = new TrendDetector().setMinFreq(minFreq)
  54 + .setBucketInterval(bucketInterval);
  55 + td.addCollector(new FileTrendCollector("resiltech-trends.tsv")
  56 + .asPlain());
  57 + td.addCollector(new ResiltechCollector());
  58 + TwitterAPIListener listener = new TwitterAPIListener(td);
  59 + listener.run();
  60 + }
  61 +}
... ...
src/main/java/it/cnr/isti/hpc/trends/listener/TwitterAPIListener.java
... ... @@ -101,7 +101,7 @@ public class TwitterAPIListener extends AbstractTwitterListener {
101 101  
102 102 public static void main(String[] args) throws InvalidActivityException {
103 103 TwitterAPIListener listener = new TwitterAPIListener(
104   - new TrendDetector().setMinFreq(15).setBucketInterval(30));
  104 + new TrendDetector().setMinFreq(1).setBucketInterval(1));
105 105  
106 106 listener.run();
107 107  
... ...
src/main/java/it/cnr/isti/hpc/trends/output/ResiltechCollector.java 0 → 100644
  1 +/**
  2 + * Copyright 2015 Diego Ceccarelli
  3 + *
  4 + * Licensed under the Apache License, Version 2.0 (the "License");
  5 + * you may not use this file except in compliance with the License.
  6 + * You may obtain a copy of the License at
  7 + *
  8 + * http://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS,
  12 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 + * See the License for the specific language governing permissions and
  14 + * limitations under the License.
  15 + */
  16 +package it.cnr.isti.hpc.trends.output;
  17 +
  18 +import it.cnr.isti.hpc.property.ProjectProperties;
  19 +import it.cnr.isti.hpc.trends.output.FileTrendCollector.StatusSerializer;
  20 +import it.cnr.isti.hpc.trends.util.StatusJSONImpl;
  21 +
  22 +import java.io.IOException;
  23 +
  24 +import org.slf4j.Logger;
  25 +import org.slf4j.LoggerFactory;
  26 +
  27 +import com.google.gson.Gson;
  28 +import com.google.gson.GsonBuilder;
  29 +import com.sun.jersey.api.client.Client;
  30 +import com.sun.jersey.api.client.ClientResponse;
  31 +import com.sun.jersey.api.client.WebResource;
  32 +
  33 +/**
  34 + * @author Diego Ceccarelli <diego.ceccarelli@isti.cnr.it>
  35 + *
  36 + * Created on Feb 7, 2015
  37 + */
  38 +public class ResiltechCollector extends TrendCollector {
  39 +
  40 + private static final Logger logger = LoggerFactory
  41 + .getLogger(ResiltechCollector.class);
  42 + private static ProjectProperties properties = new ProjectProperties(
  43 + ResiltechCollector.class);
  44 + private static final String RESILTECH_API = properties
  45 + .get("resiltec.trend.service");
  46 +
  47 + private static Gson gson = new GsonBuilder().registerTypeAdapter(
  48 + StatusJSONImpl.class, new StatusSerializer()).create();
  49 +
  50 + Client client = Client.create();
  51 + WebResource webResource = client.resource(RESILTECH_API);
  52 +
  53 + public ResiltechCollector() {
  54 +
  55 + }
  56 +
  57 + @Override
  58 + public void add(Trend t) throws IOException {
  59 + try {
  60 +
  61 + // Form form = new Form();
  62 + //
  63 + // form.add("type", "text/json");
  64 + // form.add("resource-block", gson.toJson(trend));
  65 +
  66 + // POST method
  67 + ClientResponse response = webResource.type("text/json").post(
  68 + ClientResponse.class, gson.toJson(t));
  69 +
  70 + // check response status code
  71 + if (response.getStatus() != 200) {
  72 + logger.error("error {} {}", response.getStatus(),
  73 + response.getMetadata());
  74 +
  75 + }
  76 + logger.info("trend {} sent", t.getDescription());
  77 +
  78 + } catch (Exception e) {
  79 + e.printStackTrace();
  80 + }
  81 +
  82 + }
  83 +
  84 + @Override
  85 + public void close() throws IOException {
  86 +
  87 + }
  88 +
  89 +}
... ...
twitter-filter-keywords.txt 0 → 100644
  1 +'ndrangheta
  2 +aereoporto
  3 +anarchia
  4 +anarchici
  5 +andrangheta
  6 +antivirus
  7 +armato
  8 +assassino
  9 +attacco
  10 +attentato
  11 +barbone
  12 +bomba
  13 +bombe
  14 +bossi
  15 +camorra
  16 +carabinieri
  17 +casapound
  18 +coltello
  19 +contrabbando
  20 +corteo
  21 +criminali
  22 +digos
  23 +disastro
  24 +drogato
  25 +elezioni
  26 +eoliche
  27 +esercitazione
  28 +esercito
  29 +esplosione
  30 +famiglia
  31 +force
  32 +furto
  33 +gang
  34 +hamas
  35 +incendio
  36 +incidente
  37 +influenza
  38 +ira
  39 +isil
  40 +isis
  41 +islamico
  42 +israele
  43 +italia
  44 +jihad
  45 +mafia
  46 +manifestazione
  47 +militari
  48 +minaccia
  49 +mobbing
  50 +ostaggio
  51 +pale
  52 +palestina
  53 +pericolo
  54 +piccolo
  55 +pirati
  56 +pisa
  57 +pistola
  58 +polizia
  59 +pontedera
  60 +presidente
  61 +pubblico
  62 +pugno
  63 +qaeda
  64 +rapimento
  65 +renzi
  66 +ris
  67 +rivolta
  68 +rivoluzione
  69 +salvini
  70 +sangue
  71 +shabaab
  72 +somali
  73 +somalia
  74 +sospetto
  75 +sparatoria
  76 +stalking
  77 +stuprata
  78 +stupro
  79 +suicidio
  80 +swat
  81 +talebani
  82 +task
  83 +terremoto
  84 +terrorismo
  85 +tortura
  86 +tossico
  87 +ultras
  88 +vendola
  89 +violenza
  90 +virus
... ...