Commit 2a605d23c5927f2a859307d5032848749a93bd38

Authored by Cristina Muntean
1 parent 0cc8b816

added new scripts

Showing 2 changed files with 46 additions and 0 deletions   Show diff stats
gen-features-aggregates.sh 0 → 100755
  1 +#!/bin/bash
  2 +
  3 +CORES=16
  4 +INPUT_DIR=/data/muntean/filter-10-cities-november-tweets
  5 +OUTPUT_DIR=/data/muntean/edge-features-10-cities-november
  6 +COMMAND="time python wordcount_from_json_list_with_edge_features.py"
  7 +
  8 +for LINE in `ls $INPUT_DIR/*of3*` # this is for 10 day slots
  9 +#for LINE in `ls $INPUT_DIR/*_month*` # this is for entire month
  10 +do
  11 + #echo $LINE
  12 + OUTPUT_NAME=`basename $LINE | cut -d'.' -f1`
  13 + #echo $OUTPUT_NAME
  14 + #sem -j $CORES
  15 + $COMMAND $LINE $OUTPUT_DIR/$OUTPUT_NAME.tsv 2>&1 > $OUTPUT_DIR/stats-${OUTPUT_NAME}-WCEF.txt
  16 +done
  17 +sem --wait
  18 +exit 0
... ...
sort-on-feat-aggregates.sh 0 → 100755
  1 +#!/bin/bash
  2 +
  3 +#columns:
  4 +#1 id
  5 +#2 description
  6 +#3 nodeCount
  7 +#4 edgeCount
  8 +#5 mentionCount
  9 +#6 replyCount
  10 +#7 RTCount
  11 +#8 innerRTCount
  12 +#9 outerRTCount
  13 +#10 quoteCount
  14 +#11 innerQuoteCount
  15 +#12 outerQuoteCount
  16 +
  17 +CORES=16
  18 +INPUT_DIR=/data/muntean/edge-features-10-cities-november
  19 +COLUMN=4
  20 +
  21 +for LINE in `ls $INPUT_DIR/*`
  22 +do
  23 + #echo $LINE
  24 + OUTPUT_NAME=`basename $LINE | cut -d'.' -f1`
  25 + #echo $OUTPUT_NAME
  26 + sort -t$'\t' -n -r -k$COLUMN $LINE > $INPUT_DIR/${OUTPUT_NAME}-sorted-col-${COLUMN}.tsv
  27 +done
  28 +exit 0
... ...