Commit 47b06752b620786abf2ffa09e70ed009110fbb0f

Authored by Cristina Muntean
1 parent 8c61b7c3

put unicode description in node

Showing 2 changed files with 4 additions and 3 deletions   Show diff stats
graph/Node.py
... ... @@ -14,7 +14,8 @@ class Node:
14 14  
15 15 def __init__(self, node_id, description):
16 16 self.id = node_id # it's an int
17   - self.description = json.dumps(description).replace('"', '')
  17 + # self.description = json.dumps(description).replace('"', '')
  18 + self.description = description
18 19 self.nodeCount = 0
19 20 self.edgeCount = 0
20 21 self.mentionCount = 0
... ... @@ -82,7 +83,7 @@ class Node:
82 83 print "There's a problem with the column number"
83 84 return None
84 85 nodeId = int(dataList[0])
85   - nodeDesc = dataList[1]
  86 + nodeDesc = unicode(dataList[1])
86 87 nodeCount = int(dataList[2])
87 88 nodeEdgeCount = int(dataList[3])
88 89 nodeMentionCount = int(dataList[4])
... ...
merge_wordcount_with_edge_features.py
... ... @@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq):
110 110 inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8")
111 111 for line in inputFile:
112 112 lines += 1
113   - node = Node.parseString(unicode(line))
  113 + node = Node.parseString(line)
114 114 if node.nodeCount > minfreq: #!!! this restricts a lot the dataset
115 115 if node.description in tokenDict:
116 116 # update node
... ...