diff --git a/graph/Node.py b/graph/Node.py index 9aad726..5c4a05d 100644 --- a/graph/Node.py +++ b/graph/Node.py @@ -14,7 +14,8 @@ class Node: def __init__(self, node_id, description): self.id = node_id # it's an int - self.description = json.dumps(description).replace('"', '') + # self.description = json.dumps(description).replace('"', '') + self.description = description self.nodeCount = 0 self.edgeCount = 0 self.mentionCount = 0 @@ -82,7 +83,7 @@ class Node: print "There's a problem with the column number" return None nodeId = int(dataList[0]) - nodeDesc = dataList[1] + nodeDesc = unicode(dataList[1]) nodeCount = int(dataList[2]) nodeEdgeCount = int(dataList[3]) nodeMentionCount = int(dataList[4]) diff --git a/merge_wordcount_with_edge_features.py b/merge_wordcount_with_edge_features.py index b111b97..169d77b 100644 --- a/merge_wordcount_with_edge_features.py +++ b/merge_wordcount_with_edge_features.py @@ -110,7 +110,7 @@ def mergeBlackList(city, cityFile, minfreq): inputFile = codecs.open(fileDir+"/"+filename, "r", "utf-8") for line in inputFile: lines += 1 - node = Node.parseString(unicode(line)) + node = Node.parseString(line) if node.nodeCount > minfreq: #!!! this restricts a lot the dataset if node.description in tokenDict: # update node -- libgit2 0.21.4