Commit 47b06752b620786abf2ffa09e70ed009110fbb0f
1 parent
8c61b7c3
put unicode description in node
Showing
2 changed files
with
4 additions
and
3 deletions
Show diff stats
graph/Node.py
@@ -14,7 +14,8 @@ class Node: | @@ -14,7 +14,8 @@ class Node: | ||
14 | 14 | ||
15 | def __init__(self, node_id, description): | 15 | def __init__(self, node_id, description): |
16 | self.id = node_id # it's an int | 16 | self.id = node_id # it's an int |
17 | - self.description = json.dumps(description).replace('"', '') | 17 | + # self.description = json.dumps(description).replace('"', '') |
18 | + self.description = description | ||
18 | self.nodeCount = 0 | 19 | self.nodeCount = 0 |
19 | self.edgeCount = 0 | 20 | self.edgeCount = 0 |
20 | self.mentionCount = 0 | 21 | self.mentionCount = 0 |
@@ -82,7 +83,7 @@ class Node: | @@ -82,7 +83,7 @@ class Node: | ||
82 | print "There's a problem with the column number" | 83 | print "There's a problem with the column number" |
83 | return None | 84 | return None |
84 | nodeId = int(dataList[0]) | 85 | nodeId = int(dataList[0]) |
85 | - nodeDesc = dataList[1] | 86 | + nodeDesc = unicode(dataList[1]) |
86 | nodeCount = int(dataList[2]) | 87 | nodeCount = int(dataList[2]) |
87 | nodeEdgeCount = int(dataList[3]) | 88 | nodeEdgeCount = int(dataList[3]) |
88 | nodeMentionCount = int(dataList[4]) | 89 | nodeMentionCount = int(dataList[4]) |
merge_wordcount_with_edge_features.py
@@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq): | @@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq): | ||
110 | inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8") | 110 | inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8") |
111 | for line in inputFile: | 111 | for line in inputFile: |
112 | lines += 1 | 112 | lines += 1 |
113 | - node = Node.parseString(unicode(line)) | 113 | + node = Node.parseString(line) |
114 | if node.nodeCount > minfreq: #!!! this restricts a lot the dataset | 114 | if node.nodeCount > minfreq: #!!! this restricts a lot the dataset |
115 | if node.description in tokenDict: | 115 | if node.description in tokenDict: |
116 | # update node | 116 | # update node |