Commit 8c61b7c34844934fc3c577edb1ce5dfdea7a5a12
1 parent
769fad15
some unicode fix when reding the file
Showing
3 changed files
with
2 additions
and
3 deletions
Show diff stats
merge_wordcount_with_edge_features.py
@@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq): | @@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq): | ||
110 | inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8") | 110 | inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8") |
111 | for line in inputFile: | 111 | for line in inputFile: |
112 | lines += 1 | 112 | lines += 1 |
113 | - node = Node.parseString(line) | 113 | + node = Node.parseString(unicode(line)) |
114 | if node.nodeCount > minfreq: #!!! this restricts a lot the dataset | 114 | if node.nodeCount > minfreq: #!!! this restricts a lot the dataset |
115 | if node.description in tokenDict: | 115 | if node.description in tokenDict: |
116 | # update node | 116 | # update node |
prepare_plot_scatter_2_distrib.py
@@ -13,8 +13,6 @@ we do a scatterplot | @@ -13,8 +13,6 @@ we do a scatterplot | ||
13 | import argparse | 13 | import argparse |
14 | import codecs | 14 | import codecs |
15 | import logging | 15 | import logging |
16 | -import os | ||
17 | -import sys | ||
18 | from collections import defaultdict | 16 | from collections import defaultdict |
19 | 17 | ||
20 | stopwords = open('./resources/stop-word-list.txt', 'r').read().decode('utf-8').split('\r\n') | 18 | stopwords = open('./resources/stop-word-list.txt', 'r').read().decode('utf-8').split('\r\n') |