Commit 47b06752b620786abf2ffa09e70ed009110fbb0f

Authored by Cristina Muntean
1 parent 8c61b7c3

put unicode description in node

Showing 2 changed files with 4 additions and 3 deletions   Show diff stats
@@ -14,7 +14,8 @@ class Node: @@ -14,7 +14,8 @@ class Node:
14 14
15 def __init__(self, node_id, description): 15 def __init__(self, node_id, description):
16 self.id = node_id # it's an int 16 self.id = node_id # it's an int
17 - self.description = json.dumps(description).replace('"', '') 17 + # self.description = json.dumps(description).replace('"', '')
  18 + self.description = description
18 self.nodeCount = 0 19 self.nodeCount = 0
19 self.edgeCount = 0 20 self.edgeCount = 0
20 self.mentionCount = 0 21 self.mentionCount = 0
@@ -82,7 +83,7 @@ class Node: @@ -82,7 +83,7 @@ class Node:
82 print "There's a problem with the column number" 83 print "There's a problem with the column number"
83 return None 84 return None
84 nodeId = int(dataList[0]) 85 nodeId = int(dataList[0])
85 - nodeDesc = dataList[1] 86 + nodeDesc = unicode(dataList[1])
86 nodeCount = int(dataList[2]) 87 nodeCount = int(dataList[2])
87 nodeEdgeCount = int(dataList[3]) 88 nodeEdgeCount = int(dataList[3])
88 nodeMentionCount = int(dataList[4]) 89 nodeMentionCount = int(dataList[4])
merge_wordcount_with_edge_features.py
@@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq): @@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq):
110 inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8") 110 inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8")
111 for line in inputFile: 111 for line in inputFile:
112 lines += 1 112 lines += 1
113 - node = Node.parseString(unicode(line)) 113 + node = Node.parseString(line)
114 if node.nodeCount > minfreq: #!!! this restricts a lot the dataset 114 if node.nodeCount > minfreq: #!!! this restricts a lot the dataset
115 if node.description in tokenDict: 115 if node.description in tokenDict:
116 # update node 116 # update node