Commit cf2ef076f78fee691e3588b0531a327e2eb1dce0

Authored by Cristina Muntean
1 parent 6a47cd9a

fixed mac_distrib

Showing 1 changed file with 27 additions and 27 deletions   Show diff stats
prepare_plot_scatter_2_distrib.py
... ... @@ -58,10 +58,8 @@ def readFromFileMultipleEdges(filename, columnNumber):
58 58 # read first line to get max
59 59 word = rowDataCols[1]
60 60 counter = int(rowDataCols[columnNumber-1]) # the index starts at 0 so column 9 corresponds to 8 in the array index
61   - print type(counter), counter
62 61 docList.append(tuple([word, counter]))
63 62 max_in_distrib = counter
64   - print type(max_in_distrib), max_in_distrib
65 63  
66 64 # read other lines
67 65 for line in f:
... ... @@ -72,6 +70,8 @@ def readFromFileMultipleEdges(filename, columnNumber):
72 70 docList.append(tuple([word, counter]))
73 71 if counter > max_in_distrib:
74 72 max_in_distrib = counter
  73 + else:
  74 + print "Num columns out of range", line
75 75 return docList, max_in_distrib
76 76  
77 77  
... ... @@ -96,30 +96,30 @@ if __name__ == '__main__':
96 96 print len(a), len(b)
97 97 print "Maxes: ", a_max, b_max
98 98  
99   - # # make b a default dict as we search for elements from a
100   - # bDict = {rows[0]: int(rows[1]) for rows in b}
101   - # bDict = defaultdict(int, bDict)
102   - #
103   - # # initialize the lists of elements selected
104   - # X = list()
105   - # Y = list()
106   - #
107   - # # populate the lists with the subset of k values
108   - # # now we normalize
109   - # local_topics = []
110   - # for word, counter in a[:k]:
111   - # X.append(int(counter)/a_max)
112   - # Y.append(int(bDict[word])/b_max)
113   - # if int(bDict[word]) < 0.25 * float(counter):
114   - # local_topics.append(word)
115   - # labels = [row[0] for row in a[:k]]
116   - #
117   - # print local_topics
118   - # print len(X), len(Y), len(labels)
119   - #
120   - # # write files for plots
121   - # for (x,y,label) in zip(X,Y,labels):
122   - # dataFile.write("{}\t{}\t{}\n".format(str(x),str(y),label))
123   - # dataFile.close()
  99 + # make b a default dict as we search for elements from a
  100 + bDict = {rows[0]: int(rows[1]) for rows in b}
  101 + bDict = defaultdict(int, bDict)
  102 +
  103 + # initialize the lists of elements selected
  104 + X = list()
  105 + Y = list()
  106 +
  107 + # populate the lists with the subset of k values
  108 + # now we normalize
  109 + local_topics = []
  110 + for word, counter in a[:k]:
  111 + X.append(int(counter)/a_max)
  112 + Y.append(int(bDict[word])/b_max)
  113 + if int(bDict[word]) < 0.25 * float(counter):
  114 + local_topics.append(word)
  115 + labels = [row[0] for row in a[:k]]
  116 +
  117 + print local_topics
  118 + print len(X), len(Y), len(labels)
  119 +
  120 + # write files for plots
  121 + for (x,y,label) in zip(X,Y,labels):
  122 + dataFile.write("{}\t{}\t{}\n".format(str(x),str(y),label))
  123 + dataFile.close()
124 124  
125 125  
... ...